aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunyang Shao <shaojunyang@google.com>2026-01-07 20:06:48 +0000
committerJunyang Shao <shaojunyang@google.com>2026-01-07 20:06:49 +0000
commitb8191a2f9893220bdbe52ecebb37e293847d98f5 (patch)
treeffaec06811834d36737d182a65831d65cd8ce798
parentc599a8f2385849a225d02843b3c6389dbfc5aa69 (diff)
parentf6ebd91129e13ef7f495550a4fc8fa74769f6a2d (diff)
downloadgo-b8191a2f9893220bdbe52ecebb37e293847d98f5.tar.xz
[release-branch.go1.26] all: merge master (f6ebd91) into release-branch.go1.26
Merge List: + 2026-01-07 f6ebd91129 all: update vendored x/tools + 2026-01-06 d1d0fc7a97 os/exec: avoid atomic.Bool for Cmd.startCalled + 2026-01-05 9b2e3b9a02 simd/archsimd: use V(P)MOVMSK for mask ToBits if possible + 2026-01-02 f8ee0f8475 cmd/go/testdata/vcstest/git: use git commands that work on older git versions + 2026-01-02 b094749bad test/codegen: codify bit related code generation for arm64 + 2026-01-02 e84983fa40 cmd/compile: optimize SIMD IsNaN.Or(IsNaN) + 2026-01-02 8244b85677 simd/archsimd: add tests for IsNaN + 2026-01-02 13440fb518 simd/archsimd: make IsNaN unary + 2026-01-02 c3550b3352 simd/archsimd: correct documentation of Mask types + 2026-01-02 34ad26341d net/rpc: correct comment for isExportedOrBuiltinType function + 2025-12-30 b28808d838 cmd/go/internal/modindex: fix obvious bug using failed type assertion + 2025-12-30 d64add4d60 simd/archsimd: adjust documentations slightly + 2025-12-30 1843cfbcd6 runtime/secret: make tests more sturdy + 2025-12-30 fd45d70799 all: fix some minor grammatical issues in the comments + 2025-12-30 df4e08ac65 test/codegen: fix a tab in comparisons.go to ensure pattern works + 2025-12-30 cd668d744f cmd/compile: disable inlining for functions using runtime.deferrangefunc + 2025-12-29 06eff0f7c3 simd/archsimd: add tests for Saturate-Concat operations + 2025-12-29 110aaf7137 simd/archsimd: add tests for Saturate operations + 2025-12-29 22e7b94e7f simd/archsimd: add tests for ExtendLo operations + 2025-12-29 76dddce293 simd/archsimd: remove redundant suffix of ExtendLo operations + 2025-12-29 6ecdd2fc6e simd/archsimd: add more tests for Convert operations + 2025-12-29 e0c99fe285 simd/archsimd: add more tests for Truncate operations + 2025-12-29 08369369e5 reflect: document Call/CallSlice panic when v is unexported field + 2025-12-29 ca8effbde1 internal/coverage/decodemeta: correct wording in unknown version error + 2025-12-29 0b06b68e21 encoding/gob: clarify docs about pointers to zero values not being sent + 2025-12-29 9cb3edbfe9 regexp: standardize error message format in find_test.go + 2025-12-29 b3ed0627ce tests: improve consistency and clarity of test diagnostics + 2025-12-29 3dcb48d298 test: follow got/want convention in uintptrescapes test + 2025-12-29 f7b7e94b0a test: clarify log message for surrogate UTF-8 check + 2025-12-29 e790d59674 simd/archsimd: add tests for Truncate operations + 2025-12-27 f4cec7917c cmd: fix unused errors reported by ineffassign + 2025-12-27 ca13fe02c4 simd/archsimd: add more tests for Convert operations + 2025-12-27 037c047f2c simd/archsimd: add more tests for Extend operations + 2025-12-26 7971fcdf53 test/codegen: tidy tests for bits + 2025-12-24 0f620776d7 simd/archsimd: fix "go generate" command + 2025-12-24 a5fe8c07ae simd/archsimd: guard test helpers with amd64 tag + 2025-12-23 a23d1a4ebe bytes: improve consistency in split test messages + 2025-12-23 866e461b96 cmd/go: update pkgsite doc command to v0.0.0-20251223195805-1a3bd3c788fe + 2025-12-23 08dc8393d7 time: skip test that will fail with GO111MODULE=off + 2025-12-23 43ebed88cc runtime: improve a log message in TestCleanupLost + 2025-12-23 81283ad339 runtime: fix nGsyscallNoP accounting + 2025-12-23 3e0e1667f6 test/codegen: codify bit related code generation for riscv64 + 2025-12-23 3faf988f21 errors: add a test verifying join does not flatten errors + 2025-12-23 2485a0bc2c cmd/asm/internal/asm: run riscv64 end-to-end tests for each profile + 2025-12-23 8254d66eab cmd/asm/internal/asm: abort end to end test if assembly failed + 2025-12-23 1b3db48db7 Revert "errors: optimize errors.Join for single unwrappable errors" + 2025-12-23 b6b8b2fe6e cmd/compile: handle propagating an out-of-range jump table index + 2025-12-22 2cd0371a0a debug/pe: avoid panic in File.ImportedSymbols + 2025-12-22 91435be153 runtime: revert entry point on freebsd/arm64 + 2025-12-22 c1efada1d2 simd/archsimd: correct documentation for pairwise operations + 2025-12-22 3d77a0b15e os/exec: second call to Cmd.Start is always an error + 2025-12-20 7ecb1f36ac simd/archsimd: add HasAVX2() guards to tests that need them + 2025-12-19 70c22e0ad7 simd/archsimd: delete DotProductQuadruple methods for now + 2025-12-19 42cda7c1df simd/archsimd: add Grouped for 256- and 512-bit SaturateTo(U)Int16Concat, and fix type + 2025-12-19 baa0ae3aaa simd/archsimd: correct type and instruction for SaturateToUint8 + 2025-12-19 d46c58debb go/doc: link to struct fields in the same package + 2025-12-19 25ed6c7f9b cmd/go/internal/doc: update pkgsite version + 2025-12-19 4411edf972 simd/archsimd: reword documentation for some operations + 2025-12-19 7d9418a19c simd/archsimd: reword documentation of comparison operations + 2025-12-18 d00e96d3ae internal/cpu: repair VNNI feature check + 2025-12-18 cfc024daeb simd/archsimd: reword documentation for conversion ops + 2025-12-17 ad91f5d241 simd/archsimd: reword documentation of shfit operations + 2025-12-17 b8c4cc63e7 runtime: keep track of secret allocation size + 2025-12-17 8564fede89 cmd/go: remove reference to no longer existing -i flag + 2025-12-17 eecdb61eeb crypto: rename fips140v2.0 to fips140v1.26 + 2025-12-17 05e41225f6 simd/archsimd: reword documentation of As methods + 2025-12-17 516699848b runtime/secret: warn users about allocations, loosen guarantees + 2025-12-16 8c28ab936a cmd/cgo: don't emit C local if it is not used + 2025-12-16 65b71c11d4 crypto/internal/fips140only: test fips140=only mode + 2025-12-16 ea1aa76554 go/doc: exclude examples with results + 2025-12-16 5046bdf8a6 crypto/tls: reject trailing messages after client/server hello + 2025-12-16 3f6eabdf09 cmd/compile: use unsigned constant when folding loads for SIMD ops with constants + 2025-12-16 a4b5b92055 cmd/dist: preserve existing GOEXPERIMENTs when running tests with additional experiments Change-Id: I84ad4ceba344761142b98587c07d186cf2d638ff
-rw-r--r--src/archive/tar/reader_test.go2
-rw-r--r--src/archive/tar/stat_unix.go2
-rw-r--r--src/archive/tar/strconv.go2
-rw-r--r--src/bytes/bytes_test.go4
-rw-r--r--src/cmd/asm/internal/asm/endtoend_test.go36
-rw-r--r--src/cmd/cgo/internal/test/issue76861.go12
-rw-r--r--src/cmd/cgo/internal/test/issue76861/a.go13
-rw-r--r--src/cmd/cgo/out.go12
-rw-r--r--src/cmd/compile/internal/amd64/simdssa.go98
-rw-r--r--src/cmd/compile/internal/amd64/ssa.go8
-rw-r--r--src/cmd/compile/internal/inline/inl.go3
-rw-r--r--src/cmd/compile/internal/ssa/_gen/AMD64.rules45
-rw-r--r--src/cmd/compile/internal/ssa/_gen/AMD64Ops.go9
-rw-r--r--src/cmd/compile/internal/ssa/_gen/genericOps.go8
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64.rules438
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go46
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdgenericOps.go108
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go1150
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go1884
-rw-r--r--src/cmd/compile/internal/ssa/sccp.go4
-rw-r--r--src/cmd/compile/internal/ssa/tern_helpers.go2
-rw-r--r--src/cmd/compile/internal/ssagen/intrinsics.go6
-rw-r--r--src/cmd/compile/internal/ssagen/simdintrinsics.go108
-rw-r--r--src/cmd/compile/testdata/script/issue77033.txt40
-rw-r--r--src/cmd/dist/test.go22
-rw-r--r--src/cmd/go.mod2
-rw-r--r--src/cmd/go.sum4
-rw-r--r--src/cmd/go/alldocs.go2
-rw-r--r--src/cmd/go/internal/doc/pkgsite.go2
-rw-r--r--src/cmd/go/internal/modindex/scan.go6
-rw-r--r--src/cmd/go/internal/test/test.go2
-rw-r--r--src/cmd/go/testdata/script/list_empty_importpath.txt11
-rw-r--r--src/cmd/go/testdata/vcstest/git/legacytest.txt2
-rw-r--r--src/cmd/internal/bootstrap_test/overlaydir_test.go3
-rw-r--r--src/cmd/link/link_test.go3
-rw-r--r--src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/doc.go2
-rw-r--r--src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/maps.go21
-rw-r--r--src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/modernize.go2
-rw-r--r--src/cmd/vendor/modules.txt2
-rw-r--r--src/crypto/cipher/gcm_fips140v1.26_test.go (renamed from src/crypto/cipher/gcm_fips140v2.0_test.go)4
-rw-r--r--src/crypto/hpke/aead_fips140v1.0.go (renamed from src/crypto/hpke/aead_fipsv1.0.go)0
-rw-r--r--src/crypto/hpke/aead_fips140v1.26.go (renamed from src/crypto/hpke/aead_fipsv2.0.go)0
-rw-r--r--src/crypto/internal/fips140only/fips140only_test.go408
-rw-r--r--src/crypto/internal/fips140test/acvp_capabilities_fips140v1.26.json (renamed from src/crypto/internal/fips140test/acvp_capabilities_fips140v2.0.json)0
-rw-r--r--src/crypto/internal/fips140test/acvp_fips140v1.26_test.go (renamed from src/crypto/internal/fips140test/acvp_fips140v2.0_test.go)4
-rw-r--r--src/crypto/internal/fips140test/acvp_test_fips140v1.26.config.json (renamed from src/crypto/internal/fips140test/acvp_test_fips140v2.0.config.json)0
-rw-r--r--src/crypto/internal/fips140test/cast_fips140v1.0_test.go2
-rw-r--r--src/crypto/internal/fips140test/cast_fips140v1.26_test.go (renamed from src/crypto/internal/fips140test/cast_fips140v2.0_test.go)2
-rw-r--r--src/crypto/internal/fips140test/cast_test.go2
-rw-r--r--src/crypto/internal/rand/rand_fips140v1.0.go (renamed from src/crypto/internal/rand/rand_fipsv1.0.go)0
-rw-r--r--src/crypto/internal/rand/rand_fips140v1.26.go (renamed from src/crypto/internal/rand/rand_fipsv2.0.go)0
-rw-r--r--src/crypto/tls/conn.go39
-rw-r--r--src/crypto/tls/handshake_client_tls13.go22
-rw-r--r--src/crypto/tls/handshake_server_tls13.go37
-rw-r--r--src/crypto/tls/handshake_test.go140
-rw-r--r--src/crypto/tls/quic.go11
-rw-r--r--src/debug/pe/file.go18
-rw-r--r--src/encoding/gob/doc.go20
-rw-r--r--src/errors/join.go10
-rw-r--r--src/errors/join_test.go38
-rw-r--r--src/go/doc/comment_test.go12
-rw-r--r--src/go/doc/doc.go19
-rw-r--r--src/go/doc/example.go3
-rw-r--r--src/go/doc/example_test.go2
-rw-r--r--src/go/doc/testdata/pkgdoc/doc.go2
-rw-r--r--src/internal/coverage/decodemeta/decodefile.go2
-rw-r--r--src/internal/cpu/cpu_x86.go2
-rw-r--r--src/net/rpc/server.go2
-rw-r--r--src/os/exec/exec.go10
-rw-r--r--src/os/exec/exec_test.go26
-rw-r--r--src/reflect/value.go2
-rw-r--r--src/regexp/find_test.go130
-rw-r--r--src/runtime/malloc.go2
-rw-r--r--src/runtime/malloc_generated.go152
-rw-r--r--src/runtime/malloc_stubs.go2
-rw-r--r--src/runtime/mcleanup_test.go9
-rw-r--r--src/runtime/metrics_cgo_test.go25
-rw-r--r--src/runtime/mheap.go28
-rw-r--r--src/runtime/proc.go14
-rw-r--r--src/runtime/rt0_freebsd_arm64.s7
-rw-r--r--src/runtime/secret.go9
-rw-r--r--src/runtime/secret/alloc_test.go39
-rw-r--r--src/runtime/secret/doc.go15
-rw-r--r--src/runtime/secret/secret.go15
-rw-r--r--src/runtime/secret/secret_test.go38
-rw-r--r--src/runtime/secret_nosecret.go4
-rw-r--r--src/runtime/testdata/testprogcgo/notingo.go134
-rw-r--r--src/runtime/trace.go4
-rw-r--r--src/runtime/tracebuf.go2
-rw-r--r--src/simd/archsimd/_gen/simdgen/gen_simdTypes.go64
-rw-r--r--src/simd/archsimd/_gen/simdgen/gen_simdrules.go2
-rw-r--r--src/simd/archsimd/_gen/simdgen/gen_simdssa.go4
-rw-r--r--src/simd/archsimd/_gen/simdgen/godefs.go15
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml70
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml70
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml17
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml4
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml162
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml146
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml5
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml4
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml11
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml27
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml4
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml16
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml32
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml2
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml28
-rw-r--r--src/simd/archsimd/_gen/tmplgen/main.go173
-rw-r--r--src/simd/archsimd/compare_gen_amd64.go290
-rw-r--r--src/simd/archsimd/cpu.go2
-rw-r--r--src/simd/archsimd/extra_amd64.go68
-rw-r--r--src/simd/archsimd/internal/simd_test/binary_helpers_test.go4
-rw-r--r--src/simd/archsimd/internal/simd_test/binary_test.go195
-rw-r--r--src/simd/archsimd/internal/simd_test/compare_helpers_test.go88
-rw-r--r--src/simd/archsimd/internal/simd_test/compare_test.go189
-rw-r--r--src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go4
-rw-r--r--src/simd/archsimd/internal/simd_test/generate.go3
-rw-r--r--src/simd/archsimd/internal/simd_test/helpers_test.go24
-rw-r--r--src/simd/archsimd/internal/simd_test/simd_test.go296
-rw-r--r--src/simd/archsimd/internal/simd_test/simulation_helpers_test.go178
-rw-r--r--src/simd/archsimd/internal/simd_test/ternary_helpers_test.go4
-rw-r--r--src/simd/archsimd/internal/simd_test/unary_helpers_test.go7893
-rw-r--r--src/simd/archsimd/internal/simd_test/unary_test.go221
-rw-r--r--src/simd/archsimd/maskmerge_gen_amd64.go22
-rw-r--r--src/simd/archsimd/ops_amd64.go3050
-rw-r--r--src/simd/archsimd/ops_internal_amd64.go42
-rw-r--r--src/simd/archsimd/other_gen_amd64.go218
-rw-r--r--src/simd/archsimd/shuffles_amd64.go43
-rw-r--r--src/simd/archsimd/slice_gen_amd64.go122
-rw-r--r--src/simd/archsimd/types_amd64.go398
-rw-r--r--src/simd/archsimd/unsafe_helpers.go2
-rw-r--r--src/time/export_test.go1
-rw-r--r--src/time/tick_test.go4
-rw-r--r--test/cmplxdivide.go2
-rw-r--r--test/codegen/bits.go328
-rw-r--r--test/codegen/comparisons.go4
-rw-r--r--test/codegen/simd.go31
-rw-r--r--test/fixedbugs/issue76950.go67
-rw-r--r--test/map.go2
-rw-r--r--test/stringrange.go2
-rw-r--r--test/typeparam/typelist.go2
-rw-r--r--test/uintptrescapes.dir/main.go8
143 files changed, 14940 insertions, 5317 deletions
diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go
index de3d365304..c7611ca044 100644
--- a/src/archive/tar/reader_test.go
+++ b/src/archive/tar/reader_test.go
@@ -787,7 +787,7 @@ type readBadSeeker struct{ io.ReadSeeker }
func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") }
-// TestReadTruncation test the ending condition on various truncated files and
+// TestReadTruncation tests the ending condition on various truncated files and
// that truncated files are still detected even if the underlying io.Reader
// satisfies io.Seeker.
func TestReadTruncation(t *testing.T) {
diff --git a/src/archive/tar/stat_unix.go b/src/archive/tar/stat_unix.go
index f999f56db6..891a1a3b4a 100644
--- a/src/archive/tar/stat_unix.go
+++ b/src/archive/tar/stat_unix.go
@@ -19,7 +19,7 @@ func init() {
sysStat = statUnix
}
-// userMap and groupMap caches UID and GID lookups for performance reasons.
+// userMap and groupMap cache UID and GID lookups for performance reasons.
// The downside is that renaming uname or gname by the OS never takes effect.
var userMap, groupMap sync.Map // map[int]string
diff --git a/src/archive/tar/strconv.go b/src/archive/tar/strconv.go
index 217efe9e2e..d3c28a8c4e 100644
--- a/src/archive/tar/strconv.go
+++ b/src/archive/tar/strconv.go
@@ -312,7 +312,7 @@ func formatPAXRecord(k, v string) (string, error) {
// "%d %s=%s\n" % (size, key, value)
//
// Keys and values should be UTF-8, but the number of bad writers out there
-// forces us to be a more liberal.
+// forces us to be more liberal.
// Thus, we only reject all keys with NUL, and only reject NULs in values
// for the PAX version of the USTAR string fields.
// The key must not contain an '=' character.
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go
index 9547ede312..891aef2c8b 100644
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -961,7 +961,7 @@ func TestSplit(t *testing.T) {
if tt.n < 0 {
b := sliceOfString(Split([]byte(tt.s), []byte(tt.sep)))
if !slices.Equal(result, b) {
- t.Errorf("Split disagrees withSplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
+ t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
}
}
if len(a) > 0 {
@@ -1023,7 +1023,7 @@ func TestSplitAfter(t *testing.T) {
if tt.n < 0 {
b := sliceOfString(SplitAfter([]byte(tt.s), []byte(tt.sep)))
if !slices.Equal(result, b) {
- t.Errorf("SplitAfter disagrees withSplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
+ t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
}
}
}
diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go
index e53263356d..28dce50d60 100644
--- a/src/cmd/asm/internal/asm/endtoend_test.go
+++ b/src/cmd/asm/internal/asm/endtoend_test.go
@@ -199,6 +199,11 @@ Diff:
}
obj.Flushplist(ctxt, pList, nil)
+ if !ok {
+ // If we've encountered errors, the output is unlikely to be sane.
+ t.FailNow()
+ }
+
for p := top; p != nil; p = p.Link {
if p.As == obj.ATEXT {
text = p.From.Sym
@@ -486,16 +491,35 @@ func TestPPC64EndToEnd(t *testing.T) {
}
}
-func TestRISCVEndToEnd(t *testing.T) {
- testEndToEnd(t, "riscv64", "riscv64")
+func testRISCV64AllProfiles(t *testing.T, testFn func(t *testing.T)) {
+ t.Helper()
+
+ defer func(orig int) { buildcfg.GORISCV64 = orig }(buildcfg.GORISCV64)
+
+ for _, goriscv64 := range []int{20, 22, 23} {
+ t.Run(fmt.Sprintf("rva%vu64", goriscv64), func(t *testing.T) {
+ buildcfg.GORISCV64 = goriscv64
+ testFn(t)
+ })
+ }
+}
+
+func TestRISCV64EndToEnd(t *testing.T) {
+ testRISCV64AllProfiles(t, func(t *testing.T) {
+ testEndToEnd(t, "riscv64", "riscv64")
+ })
}
-func TestRISCVErrors(t *testing.T) {
- testErrors(t, "riscv64", "riscv64error")
+func TestRISCV64Errors(t *testing.T) {
+ testRISCV64AllProfiles(t, func(t *testing.T) {
+ testErrors(t, "riscv64", "riscv64error")
+ })
}
-func TestRISCVValidation(t *testing.T) {
- testErrors(t, "riscv64", "riscv64validation")
+func TestRISCV64Validation(t *testing.T) {
+ testRISCV64AllProfiles(t, func(t *testing.T) {
+ testErrors(t, "riscv64", "riscv64validation")
+ })
}
func TestS390XEndToEnd(t *testing.T) {
diff --git a/src/cmd/cgo/internal/test/issue76861.go b/src/cmd/cgo/internal/test/issue76861.go
new file mode 100644
index 0000000000..225e2acc3f
--- /dev/null
+++ b/src/cmd/cgo/internal/test/issue76861.go
@@ -0,0 +1,12 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build cgo
+
+package cgotest
+
+// Issue 43639: No runtime test needed, make sure package
+// cmd/cgo/internal/test/issue76861 compiles without error.
+
+import _ "cmd/cgo/internal/test/issue76861"
diff --git a/src/cmd/cgo/internal/test/issue76861/a.go b/src/cmd/cgo/internal/test/issue76861/a.go
new file mode 100644
index 0000000000..18a7bda490
--- /dev/null
+++ b/src/cmd/cgo/internal/test/issue76861/a.go
@@ -0,0 +1,13 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package issue76861
+
+// #cgo CFLAGS: -Wall -Werror
+// void issue76861(void) {}
+import "C"
+
+func Issue76861() {
+ C.issue76861()
+}
diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go
index dc1e5b29e5..ac2ce8fd0d 100644
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@@ -783,13 +783,13 @@ func (p *Package) writeOutputFunc(fgcc *os.File, n *Name) {
// We're trying to write a gcc struct that matches gc's layout.
// Use packed attribute to force no padding in this struct in case
// gcc has different packing requirements.
- fmt.Fprintf(fgcc, "\t%s %v *_cgo_a = v;\n", ctype, p.packedAttribute())
- if n.FuncType.Result != nil {
- // Save the stack top for use below.
- fmt.Fprintf(fgcc, "\tchar *_cgo_stktop = _cgo_topofstack();\n")
- }
tr := n.FuncType.Result
+ if (n.Kind != "macro" && len(n.FuncType.Params) > 0) || tr != nil {
+ fmt.Fprintf(fgcc, "\t%s %v *_cgo_a = v;\n", ctype, p.packedAttribute())
+ }
if tr != nil {
+ // Save the stack top for use below.
+ fmt.Fprintf(fgcc, "\tchar *_cgo_stktop = _cgo_topofstack();\n")
fmt.Fprintf(fgcc, "\t__typeof__(_cgo_a->r) _cgo_r;\n")
}
fmt.Fprintf(fgcc, "\t_cgo_tsan_acquire();\n")
@@ -819,7 +819,7 @@ func (p *Package) writeOutputFunc(fgcc *os.File, n *Name) {
fmt.Fprintf(fgcc, "\t_cgo_errno = errno;\n")
}
fmt.Fprintf(fgcc, "\t_cgo_tsan_release();\n")
- if n.FuncType.Result != nil {
+ if tr != nil {
// The cgo call may have caused a stack copy (via a callback).
// Adjust the return value pointer appropriately.
fmt.Fprintf(fgcc, "\t_cgo_a = (void*)((char*)_cgo_a + (_cgo_topofstack() - _cgo_stktop));\n")
diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go
index f6deba3ec1..c4d0fd69c6 100644
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@@ -1,4 +1,4 @@
-// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
package amd64
@@ -175,7 +175,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMOVSQD128_128,
ssa.OpAMD64VPMOVSQD128_256,
ssa.OpAMD64VPMOVSQD256,
+ ssa.OpAMD64VPMOVUSWB128_128,
+ ssa.OpAMD64VPMOVUSWB128_256,
ssa.OpAMD64VPMOVUSWB256,
+ ssa.OpAMD64VPMOVUSDB128_128,
+ ssa.OpAMD64VPMOVUSDB128_256,
+ ssa.OpAMD64VPMOVUSDB128_512,
+ ssa.OpAMD64VPMOVUSQB128_128,
+ ssa.OpAMD64VPMOVUSQB128_256,
+ ssa.OpAMD64VPMOVUSQB128_512,
ssa.OpAMD64VPMOVUSDW128_128,
ssa.OpAMD64VPMOVUSDW128_256,
ssa.OpAMD64VPMOVUSDW256,
@@ -242,12 +250,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPADDQ256,
ssa.OpAMD64VPADDQ512,
ssa.OpAMD64VHADDPS128,
- ssa.OpAMD64VHADDPS256,
ssa.OpAMD64VHADDPD128,
- ssa.OpAMD64VHADDPD256,
ssa.OpAMD64VPHADDW128,
- ssa.OpAMD64VPHADDW256,
ssa.OpAMD64VPHADDD128,
+ ssa.OpAMD64VHADDPS256,
+ ssa.OpAMD64VHADDPD256,
+ ssa.OpAMD64VPHADDW256,
ssa.OpAMD64VPHADDD256,
ssa.OpAMD64VPHADDSW128,
ssa.OpAMD64VPHADDSW256,
@@ -512,12 +520,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPSUBQ256,
ssa.OpAMD64VPSUBQ512,
ssa.OpAMD64VHSUBPS128,
- ssa.OpAMD64VHSUBPS256,
ssa.OpAMD64VHSUBPD128,
- ssa.OpAMD64VHSUBPD256,
ssa.OpAMD64VPHSUBW128,
- ssa.OpAMD64VPHSUBW256,
ssa.OpAMD64VPHSUBD128,
+ ssa.OpAMD64VHSUBPS256,
+ ssa.OpAMD64VHSUBPD256,
+ ssa.OpAMD64VPHSUBW256,
ssa.OpAMD64VPHSUBD256,
ssa.OpAMD64VPHSUBSW128,
ssa.OpAMD64VPHSUBSW256,
@@ -731,12 +739,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPRORVQMasked128,
ssa.OpAMD64VPRORVQMasked256,
ssa.OpAMD64VPRORVQMasked512,
- ssa.OpAMD64VPACKSSDWMasked128,
ssa.OpAMD64VPACKSSDWMasked256,
ssa.OpAMD64VPACKSSDWMasked512,
- ssa.OpAMD64VPACKUSDWMasked128,
+ ssa.OpAMD64VPACKSSDWMasked128,
ssa.OpAMD64VPACKUSDWMasked256,
ssa.OpAMD64VPACKUSDWMasked512,
+ ssa.OpAMD64VPACKUSDWMasked128,
ssa.OpAMD64VSCALEFPSMasked128,
ssa.OpAMD64VSCALEFPSMasked256,
ssa.OpAMD64VSCALEFPSMasked512,
@@ -1010,7 +1018,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMOVSQDMasked128_128,
ssa.OpAMD64VPMOVSQDMasked128_256,
ssa.OpAMD64VPMOVSQDMasked256,
+ ssa.OpAMD64VPMOVUSWBMasked128_128,
+ ssa.OpAMD64VPMOVUSWBMasked128_256,
ssa.OpAMD64VPMOVUSWBMasked256,
+ ssa.OpAMD64VPMOVUSDBMasked128_128,
+ ssa.OpAMD64VPMOVUSDBMasked128_256,
+ ssa.OpAMD64VPMOVUSDBMasked128_512,
+ ssa.OpAMD64VPMOVUSQBMasked128_128,
+ ssa.OpAMD64VPMOVUSQBMasked128_256,
+ ssa.OpAMD64VPMOVUSQBMasked128_512,
ssa.OpAMD64VPMOVUSDWMasked128_128,
ssa.OpAMD64VPMOVUSDWMasked128_256,
ssa.OpAMD64VPMOVUSDWMasked256,
@@ -1308,12 +1324,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPERMI2Q256,
ssa.OpAMD64VPERMI2PD512,
ssa.OpAMD64VPERMI2Q512,
- ssa.OpAMD64VPDPBUSD128,
- ssa.OpAMD64VPDPBUSD256,
- ssa.OpAMD64VPDPBUSD512,
- ssa.OpAMD64VPDPBUSDS128,
- ssa.OpAMD64VPDPBUSDS256,
- ssa.OpAMD64VPDPBUSDS512,
ssa.OpAMD64VFMADD213PS128,
ssa.OpAMD64VFMADD213PS256,
ssa.OpAMD64VFMADD213PS512,
@@ -1430,12 +1440,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMADDUBSWMasked128Merging,
ssa.OpAMD64VPMADDUBSWMasked256Merging,
ssa.OpAMD64VPMADDUBSWMasked512Merging,
- ssa.OpAMD64VPDPBUSDMasked128,
- ssa.OpAMD64VPDPBUSDMasked256,
- ssa.OpAMD64VPDPBUSDMasked512,
- ssa.OpAMD64VPDPBUSDSMasked128,
- ssa.OpAMD64VPDPBUSDSMasked256,
- ssa.OpAMD64VPDPBUSDSMasked512,
ssa.OpAMD64VGF2P8MULBMasked128Merging,
ssa.OpAMD64VGF2P8MULBMasked256Merging,
ssa.OpAMD64VGF2P8MULBMasked512Merging,
@@ -1559,12 +1563,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPRORVQMasked128Merging,
ssa.OpAMD64VPRORVQMasked256Merging,
ssa.OpAMD64VPRORVQMasked512Merging,
- ssa.OpAMD64VPACKSSDWMasked128Merging,
ssa.OpAMD64VPACKSSDWMasked256Merging,
ssa.OpAMD64VPACKSSDWMasked512Merging,
- ssa.OpAMD64VPACKUSDWMasked128Merging,
+ ssa.OpAMD64VPACKSSDWMasked128Merging,
ssa.OpAMD64VPACKUSDWMasked256Merging,
ssa.OpAMD64VPACKUSDWMasked512Merging,
+ ssa.OpAMD64VPACKUSDWMasked128Merging,
ssa.OpAMD64VSCALEFPSMasked128Merging,
ssa.OpAMD64VSCALEFPSMasked256Merging,
ssa.OpAMD64VSCALEFPSMasked512Merging,
@@ -1955,8 +1959,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPERMI2Q256load,
ssa.OpAMD64VPERMI2PD512load,
ssa.OpAMD64VPERMI2Q512load,
- ssa.OpAMD64VPDPBUSD512load,
- ssa.OpAMD64VPDPBUSDS512load,
ssa.OpAMD64VFMADD213PS128load,
ssa.OpAMD64VFMADD213PS256load,
ssa.OpAMD64VFMADD213PS512load,
@@ -2004,12 +2006,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPERMI2QMasked256load,
ssa.OpAMD64VPERMI2PDMasked512load,
ssa.OpAMD64VPERMI2QMasked512load,
- ssa.OpAMD64VPDPBUSDMasked128load,
- ssa.OpAMD64VPDPBUSDMasked256load,
- ssa.OpAMD64VPDPBUSDMasked512load,
- ssa.OpAMD64VPDPBUSDSMasked128load,
- ssa.OpAMD64VPDPBUSDSMasked256load,
- ssa.OpAMD64VPDPBUSDSMasked512load,
ssa.OpAMD64VFMADD213PSMasked128load,
ssa.OpAMD64VFMADD213PSMasked256load,
ssa.OpAMD64VFMADD213PSMasked512load,
@@ -2146,12 +2142,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPRORVQMasked128load,
ssa.OpAMD64VPRORVQMasked256load,
ssa.OpAMD64VPRORVQMasked512load,
- ssa.OpAMD64VPACKSSDWMasked128load,
ssa.OpAMD64VPACKSSDWMasked256load,
ssa.OpAMD64VPACKSSDWMasked512load,
- ssa.OpAMD64VPACKUSDWMasked128load,
+ ssa.OpAMD64VPACKSSDWMasked128load,
ssa.OpAMD64VPACKUSDWMasked256load,
ssa.OpAMD64VPACKUSDWMasked512load,
+ ssa.OpAMD64VPACKUSDWMasked128load,
ssa.OpAMD64VSCALEFPSMasked128load,
ssa.OpAMD64VSCALEFPSMasked256load,
ssa.OpAMD64VSCALEFPSMasked512load,
@@ -2638,7 +2634,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMOVSQDMasked128_128Merging,
ssa.OpAMD64VPMOVSQDMasked128_256Merging,
ssa.OpAMD64VPMOVSQDMasked256Merging,
+ ssa.OpAMD64VPMOVUSWBMasked128_128Merging,
+ ssa.OpAMD64VPMOVUSWBMasked128_256Merging,
ssa.OpAMD64VPMOVUSWBMasked256Merging,
+ ssa.OpAMD64VPMOVUSDBMasked128_128Merging,
+ ssa.OpAMD64VPMOVUSDBMasked128_256Merging,
+ ssa.OpAMD64VPMOVUSDBMasked128_512Merging,
+ ssa.OpAMD64VPMOVUSQBMasked128_128Merging,
+ ssa.OpAMD64VPMOVUSQBMasked128_256Merging,
+ ssa.OpAMD64VPMOVUSQBMasked128_512Merging,
ssa.OpAMD64VPMOVUSDWMasked128_128Merging,
ssa.OpAMD64VPMOVUSDWMasked128_256Merging,
ssa.OpAMD64VPMOVUSDWMasked256Merging,
@@ -3021,18 +3025,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMADDUBSWMasked128,
ssa.OpAMD64VPMADDUBSWMasked256,
ssa.OpAMD64VPMADDUBSWMasked512,
- ssa.OpAMD64VPDPBUSDMasked128,
- ssa.OpAMD64VPDPBUSDMasked128load,
- ssa.OpAMD64VPDPBUSDMasked256,
- ssa.OpAMD64VPDPBUSDMasked256load,
- ssa.OpAMD64VPDPBUSDMasked512,
- ssa.OpAMD64VPDPBUSDMasked512load,
- ssa.OpAMD64VPDPBUSDSMasked128,
- ssa.OpAMD64VPDPBUSDSMasked128load,
- ssa.OpAMD64VPDPBUSDSMasked256,
- ssa.OpAMD64VPDPBUSDSMasked256load,
- ssa.OpAMD64VPDPBUSDSMasked512,
- ssa.OpAMD64VPDPBUSDSMasked512load,
ssa.OpAMD64VEXPANDPSMasked128,
ssa.OpAMD64VEXPANDPSMasked256,
ssa.OpAMD64VEXPANDPSMasked512,
@@ -3415,12 +3407,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMOVSQBMasked128_128,
ssa.OpAMD64VPMOVSQBMasked128_256,
ssa.OpAMD64VPMOVSQBMasked128_512,
- ssa.OpAMD64VPACKSSDWMasked128,
- ssa.OpAMD64VPACKSSDWMasked128load,
ssa.OpAMD64VPACKSSDWMasked256,
ssa.OpAMD64VPACKSSDWMasked256load,
ssa.OpAMD64VPACKSSDWMasked512,
ssa.OpAMD64VPACKSSDWMasked512load,
+ ssa.OpAMD64VPACKSSDWMasked128,
+ ssa.OpAMD64VPACKSSDWMasked128load,
ssa.OpAMD64VPMOVSDWMasked128_128,
ssa.OpAMD64VPMOVSDWMasked128_256,
ssa.OpAMD64VPMOVSDWMasked256,
@@ -3430,13 +3422,21 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPMOVSQDMasked128_128,
ssa.OpAMD64VPMOVSQDMasked128_256,
ssa.OpAMD64VPMOVSQDMasked256,
+ ssa.OpAMD64VPMOVUSWBMasked128_128,
+ ssa.OpAMD64VPMOVUSWBMasked128_256,
ssa.OpAMD64VPMOVUSWBMasked256,
- ssa.OpAMD64VPACKUSDWMasked128,
- ssa.OpAMD64VPACKUSDWMasked128load,
+ ssa.OpAMD64VPMOVUSDBMasked128_128,
+ ssa.OpAMD64VPMOVUSDBMasked128_256,
+ ssa.OpAMD64VPMOVUSDBMasked128_512,
+ ssa.OpAMD64VPMOVUSQBMasked128_128,
+ ssa.OpAMD64VPMOVUSQBMasked128_256,
+ ssa.OpAMD64VPMOVUSQBMasked128_512,
ssa.OpAMD64VPACKUSDWMasked256,
ssa.OpAMD64VPACKUSDWMasked256load,
ssa.OpAMD64VPACKUSDWMasked512,
ssa.OpAMD64VPACKUSDWMasked512load,
+ ssa.OpAMD64VPACKUSDWMasked128,
+ ssa.OpAMD64VPACKUSDWMasked128load,
ssa.OpAMD64VPMOVUSDWMasked128_128,
ssa.OpAMD64VPMOVUSDWMasked128_256,
ssa.OpAMD64VPMOVUSDWMasked256,
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 5ddcb84c59..e9a566d759 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -1845,7 +1845,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpAMD64VPMOVVec32x16ToM,
ssa.OpAMD64VPMOVVec64x2ToM,
ssa.OpAMD64VPMOVVec64x4ToM,
- ssa.OpAMD64VPMOVVec64x8ToM:
+ ssa.OpAMD64VPMOVVec64x8ToM,
+ ssa.OpAMD64VPMOVMSKB128,
+ ssa.OpAMD64VPMOVMSKB256,
+ ssa.OpAMD64VMOVMSKPS128,
+ ssa.OpAMD64VMOVMSKPS256,
+ ssa.OpAMD64VMOVMSKPD128,
+ ssa.OpAMD64VMOVMSKPD256:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = simdReg(v.Args[0])
diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go
index 33f9c325c3..4fa9cf07fb 100644
--- a/src/cmd/compile/internal/inline/inl.go
+++ b/src/cmd/compile/internal/inline/inl.go
@@ -516,6 +516,9 @@ opSwitch:
break opSwitch
case "panicrangestate":
cheap = true
+ case "deferrangefunc":
+ v.reason = "defer call in range func"
+ return true
}
}
}
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
index 353d272179..b49e85b53c 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@@ -1679,21 +1679,21 @@
(Cvt8toMask64x8 <t> x) => (VPMOVMToVec64x8 <types.TypeVec512> (KMOVBk <t> x))
// masks to integers
-(CvtMask8x16to16 <t> x) => (KMOVWi <t> (VPMOVVec8x16ToM <types.TypeMask> x))
-(CvtMask8x32to32 <t> x) => (KMOVDi <t> (VPMOVVec8x32ToM <types.TypeMask> x))
-(CvtMask8x64to64 <t> x) => (KMOVQi <t> (VPMOVVec8x64ToM <types.TypeMask> x))
+(CvtMask8x16to16 ...) => (VPMOVMSKB128 ...)
+(CvtMask8x32to32 ...) => (VPMOVMSKB256 ...)
+(CvtMask8x64to64 x) => (KMOVQi (VPMOVVec8x64ToM <types.TypeMask> x))
-(CvtMask16x8to8 <t> x) => (KMOVBi <t> (VPMOVVec16x8ToM <types.TypeMask> x))
-(CvtMask16x16to16 <t> x) => (KMOVWi <t> (VPMOVVec16x16ToM <types.TypeMask> x))
-(CvtMask16x32to32 <t> x) => (KMOVDi <t> (VPMOVVec16x32ToM <types.TypeMask> x))
+(CvtMask16x8to8 x) => (KMOVBi (VPMOVVec16x8ToM <types.TypeMask> x))
+(CvtMask16x16to16 x) => (KMOVWi (VPMOVVec16x16ToM <types.TypeMask> x))
+(CvtMask16x32to32 x) => (KMOVDi (VPMOVVec16x32ToM <types.TypeMask> x))
-(CvtMask32x4to8 <t> x) => (KMOVBi <t> (VPMOVVec32x4ToM <types.TypeMask> x))
-(CvtMask32x8to8 <t> x) => (KMOVBi <t> (VPMOVVec32x8ToM <types.TypeMask> x))
-(CvtMask32x16to16 <t> x) => (KMOVWi <t> (VPMOVVec32x16ToM <types.TypeMask> x))
+(CvtMask32x4to8 ...) => (VMOVMSKPS128 ...)
+(CvtMask32x8to8 ...) => (VMOVMSKPS256 ...)
+(CvtMask32x16to16 x) => (KMOVWi (VPMOVVec32x16ToM <types.TypeMask> x))
-(CvtMask64x2to8 <t> x) => (KMOVBi <t> (VPMOVVec64x2ToM <types.TypeMask> x))
-(CvtMask64x4to8 <t> x) => (KMOVBi <t> (VPMOVVec64x4ToM <types.TypeMask> x))
-(CvtMask64x8to8 <t> x) => (KMOVBi <t> (VPMOVVec64x8ToM <types.TypeMask> x))
+(CvtMask64x2to8 ...) => (VMOVMSKPD128 ...)
+(CvtMask64x4to8 ...) => (VMOVMSKPD256 ...)
+(CvtMask64x8to8 x) => (KMOVBi (VPMOVVec64x8ToM <types.TypeMask> x))
// optimizations
(MOVBstore [off] {sym} ptr (KMOVBi mask) mem) => (KMOVBstore [off] {sym} ptr mask mem)
@@ -1730,6 +1730,13 @@
// Misc
(IsZeroVec x) => (SETEQ (VPTEST x x))
+(IsNaNFloat32x4 x) => (VCMPPS128 [3] x x)
+(IsNaNFloat32x8 x) => (VCMPPS256 [3] x x)
+(IsNaNFloat32x16 x) => (VPMOVMToVec32x16 (VCMPPS512 [3] x x))
+(IsNaNFloat64x2 x) => (VCMPPD128 [3] x x)
+(IsNaNFloat64x4 x) => (VCMPPD256 [3] x x)
+(IsNaNFloat64x8 x) => (VPMOVMToVec64x8 (VCMPPD512 [3] x x))
+
// SIMD vector K-masked loads and stores
(LoadMasked64 <t> ptr mask mem) && t.Size() == 64 => (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
@@ -1818,10 +1825,10 @@
(EQ (VPTEST x:(VPANDN(128|256) j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order
(EQ (VPTEST x:(VPANDN(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order
-// DotProductQuadruple optimizations
-(VPADDD128 (VPDPBUSD128 (Zero128 <t>) x y) z) => (VPDPBUSD128 <t> z x y)
-(VPADDD256 (VPDPBUSD256 (Zero256 <t>) x y) z) => (VPDPBUSD256 <t> z x y)
-(VPADDD512 (VPDPBUSD512 (Zero512 <t>) x y) z) => (VPDPBUSD512 <t> z x y)
-(VPADDD128 (VPDPBUSDS128 (Zero128 <t>) x y) z) => (VPDPBUSDS128 <t> z x y)
-(VPADDD256 (VPDPBUSDS256 (Zero256 <t>) x y) z) => (VPDPBUSDS256 <t> z x y)
-(VPADDD512 (VPDPBUSDS512 (Zero512 <t>) x y) z) => (VPDPBUSDS512 <t> z x y) \ No newline at end of file
+// optimize x.IsNaN().Or(y.IsNaN())
+(VPOR128 (VCMPP(S|D)128 [3] x x) (VCMPP(S|D)128 [3] y y)) => (VCMPP(S|D)128 [3] x y)
+(VPOR256 (VCMPP(S|D)256 [3] x x) (VCMPP(S|D)256 [3] y y)) => (VCMPP(S|D)256 [3] x y)
+(VPORD512 (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) (VPMOVMToVec32x16 (VCMPPS512 [3] y y))) =>
+ (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
+(VPORD512 (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) (VPMOVMToVec64x8 (VCMPPD512 [3] y y))) =>
+ (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
index 2fb4fdfc96..b13eb5aa21 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
@@ -1368,6 +1368,7 @@ func init() {
{name: "VPMASK64load512", argLength: 3, reg: vloadk, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=k mask, arg2 = mem
{name: "VPMASK64store512", argLength: 4, reg: vstorek, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=k mask, arg3 = mem
+ // AVX512 moves between int-vector and mask registers
{name: "VPMOVMToVec8x16", argLength: 1, reg: kv, asm: "VPMOVM2B"},
{name: "VPMOVMToVec8x32", argLength: 1, reg: kv, asm: "VPMOVM2B"},
{name: "VPMOVMToVec8x64", argLength: 1, reg: kw, asm: "VPMOVM2B"},
@@ -1400,6 +1401,14 @@ func init() {
{name: "VPMOVVec64x4ToM", argLength: 1, reg: vk, asm: "VPMOVQ2M"},
{name: "VPMOVVec64x8ToM", argLength: 1, reg: wk, asm: "VPMOVQ2M"},
+ // AVX1/2 moves from int-vector to bitmask (extracting sign bits)
+ {name: "VPMOVMSKB128", argLength: 1, reg: vgp, asm: "VPMOVMSKB"},
+ {name: "VPMOVMSKB256", argLength: 1, reg: vgp, asm: "VPMOVMSKB"},
+ {name: "VMOVMSKPS128", argLength: 1, reg: vgp, asm: "VMOVMSKPS"},
+ {name: "VMOVMSKPS256", argLength: 1, reg: vgp, asm: "VMOVMSKPS"},
+ {name: "VMOVMSKPD128", argLength: 1, reg: vgp, asm: "VMOVMSKPD"},
+ {name: "VMOVMSKPD256", argLength: 1, reg: vgp, asm: "VMOVMSKPD"},
+
// X15 is the zero register up to 128-bit. For larger values, we zero it on the fly.
{name: "Zero128", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
{name: "Zero256", argLength: 0, reg: v01, asm: "VPXOR"},
diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go
index 8637133e5f..85bde1aab2 100644
--- a/src/cmd/compile/internal/ssa/_gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go
@@ -715,6 +715,14 @@ var genericOps = []opData{
// Returns true if arg0 is all zero.
{name: "IsZeroVec", argLength: 1},
+
+ // Returns a mask indicating whether arg0's elements are NaN.
+ {name: "IsNaNFloat32x4", argLength: 1},
+ {name: "IsNaNFloat32x8", argLength: 1},
+ {name: "IsNaNFloat32x16", argLength: 1},
+ {name: "IsNaNFloat64x2", argLength: 1},
+ {name: "IsNaNFloat64x4", argLength: 1},
+ {name: "IsNaNFloat64x8", argLength: 1},
}
// kind controls successors implicit exit
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index 649940497c..5c83f39a1f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -1,4 +1,4 @@
-// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
(AESDecryptLastRoundUint8x16 ...) => (VAESDECLAST128 ...)
(AESDecryptLastRoundUint8x32 ...) => (VAESDECLAST256 ...)
@@ -57,19 +57,19 @@
(AddUint64x4 ...) => (VPADDQ256 ...)
(AddUint64x8 ...) => (VPADDQ512 ...)
(AddPairsFloat32x4 ...) => (VHADDPS128 ...)
-(AddPairsFloat32x8 ...) => (VHADDPS256 ...)
(AddPairsFloat64x2 ...) => (VHADDPD128 ...)
-(AddPairsFloat64x4 ...) => (VHADDPD256 ...)
(AddPairsInt16x8 ...) => (VPHADDW128 ...)
-(AddPairsInt16x16 ...) => (VPHADDW256 ...)
(AddPairsInt32x4 ...) => (VPHADDD128 ...)
-(AddPairsInt32x8 ...) => (VPHADDD256 ...)
(AddPairsUint16x8 ...) => (VPHADDW128 ...)
-(AddPairsUint16x16 ...) => (VPHADDW256 ...)
(AddPairsUint32x4 ...) => (VPHADDD128 ...)
-(AddPairsUint32x8 ...) => (VPHADDD256 ...)
+(AddPairsGroupedFloat32x8 ...) => (VHADDPS256 ...)
+(AddPairsGroupedFloat64x4 ...) => (VHADDPD256 ...)
+(AddPairsGroupedInt16x16 ...) => (VPHADDW256 ...)
+(AddPairsGroupedInt32x8 ...) => (VPHADDD256 ...)
+(AddPairsGroupedUint16x16 ...) => (VPHADDW256 ...)
+(AddPairsGroupedUint32x8 ...) => (VPHADDD256 ...)
(AddPairsSaturatedInt16x8 ...) => (VPHADDSW128 ...)
-(AddPairsSaturatedInt16x16 ...) => (VPHADDSW256 ...)
+(AddPairsSaturatedGroupedInt16x16 ...) => (VPHADDSW256 ...)
(AddSaturatedInt8x16 ...) => (VPADDSB128 ...)
(AddSaturatedInt8x32 ...) => (VPADDSB256 ...)
(AddSaturatedInt8x64 ...) => (VPADDSB512 ...)
@@ -316,12 +316,6 @@
(DotProductPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
(DotProductPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
(DotProductPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
-(DotProductQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
-(DotProductQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
-(DotProductQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
-(DotProductQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
-(DotProductQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
-(DotProductQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
(EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
(EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
(EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
@@ -382,26 +376,26 @@
(ExpandUint64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(ExpandUint64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(ExpandUint64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ExtendLo2ToInt64x2Int8x16 ...) => (VPMOVSXBQ128 ...)
-(ExtendLo2ToInt64x2Int16x8 ...) => (VPMOVSXWQ128 ...)
-(ExtendLo2ToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...)
-(ExtendLo2ToUint64x2Uint8x16 ...) => (VPMOVZXBQ128 ...)
-(ExtendLo2ToUint64x2Uint16x8 ...) => (VPMOVZXWQ128 ...)
-(ExtendLo2ToUint64x2Uint32x4 ...) => (VPMOVZXDQ128 ...)
-(ExtendLo4ToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...)
-(ExtendLo4ToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...)
-(ExtendLo4ToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...)
-(ExtendLo4ToInt64x4Int16x8 ...) => (VPMOVSXWQ256 ...)
-(ExtendLo4ToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...)
-(ExtendLo4ToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...)
-(ExtendLo4ToUint64x4Uint8x16 ...) => (VPMOVZXBQ256 ...)
-(ExtendLo4ToUint64x4Uint16x8 ...) => (VPMOVZXWQ256 ...)
-(ExtendLo8ToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...)
-(ExtendLo8ToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...)
-(ExtendLo8ToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...)
-(ExtendLo8ToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...)
-(ExtendLo8ToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...)
-(ExtendLo8ToUint64x8Uint8x16 ...) => (VPMOVZXBQ512 ...)
+(ExtendLo2ToInt64Int8x16 ...) => (VPMOVSXBQ128 ...)
+(ExtendLo2ToInt64Int16x8 ...) => (VPMOVSXWQ128 ...)
+(ExtendLo2ToInt64Int32x4 ...) => (VPMOVSXDQ128 ...)
+(ExtendLo2ToUint64Uint8x16 ...) => (VPMOVZXBQ128 ...)
+(ExtendLo2ToUint64Uint16x8 ...) => (VPMOVZXWQ128 ...)
+(ExtendLo2ToUint64Uint32x4 ...) => (VPMOVZXDQ128 ...)
+(ExtendLo4ToInt32Int8x16 ...) => (VPMOVSXBD128 ...)
+(ExtendLo4ToInt32Int16x8 ...) => (VPMOVSXWD128 ...)
+(ExtendLo4ToInt64Int8x16 ...) => (VPMOVSXBQ256 ...)
+(ExtendLo4ToInt64Int16x8 ...) => (VPMOVSXWQ256 ...)
+(ExtendLo4ToUint32Uint8x16 ...) => (VPMOVZXBD128 ...)
+(ExtendLo4ToUint32Uint16x8 ...) => (VPMOVZXWD128 ...)
+(ExtendLo4ToUint64Uint8x16 ...) => (VPMOVZXBQ256 ...)
+(ExtendLo4ToUint64Uint16x8 ...) => (VPMOVZXWQ256 ...)
+(ExtendLo8ToInt16Int8x16 ...) => (VPMOVSXBW128 ...)
+(ExtendLo8ToInt32Int8x16 ...) => (VPMOVSXBD256 ...)
+(ExtendLo8ToInt64Int8x16 ...) => (VPMOVSXBQ512 ...)
+(ExtendLo8ToUint16Uint8x16 ...) => (VPMOVZXBW128 ...)
+(ExtendLo8ToUint32Uint8x16 ...) => (VPMOVZXBD256 ...)
+(ExtendLo8ToUint64Uint8x16 ...) => (VPMOVZXBQ512 ...)
(ExtendToInt16Int8x16 ...) => (VPMOVSXBW256 ...)
(ExtendToInt16Int8x32 ...) => (VPMOVSXBW512 ...)
(ExtendToInt32Int8x16 ...) => (VPMOVSXBD512 ...)
@@ -565,12 +559,6 @@
(InterleaveLoGroupedUint32x16 ...) => (VPUNPCKLDQ512 ...)
(InterleaveLoGroupedUint64x4 ...) => (VPUNPCKLQDQ256 ...)
(InterleaveLoGroupedUint64x8 ...) => (VPUNPCKLQDQ512 ...)
-(IsNanFloat32x4 x y) => (VCMPPS128 [3] x y)
-(IsNanFloat32x8 x y) => (VCMPPS256 [3] x y)
-(IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
-(IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
-(IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
-(IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
(LeadingZerosInt32x4 ...) => (VPLZCNTD128 ...)
(LeadingZerosInt32x8 ...) => (VPLZCNTD256 ...)
(LeadingZerosInt32x16 ...) => (VPLZCNTD512 ...)
@@ -914,29 +902,29 @@
(SaturateToInt16Int64x4 ...) => (VPMOVSQW128_256 ...)
(SaturateToInt16Int64x8 ...) => (VPMOVSQW128_512 ...)
(SaturateToInt16ConcatInt32x4 ...) => (VPACKSSDW128 ...)
-(SaturateToInt16ConcatInt32x8 ...) => (VPACKSSDW256 ...)
-(SaturateToInt16ConcatInt32x16 ...) => (VPACKSSDW512 ...)
+(SaturateToInt16ConcatGroupedInt32x8 ...) => (VPACKSSDW256 ...)
+(SaturateToInt16ConcatGroupedInt32x16 ...) => (VPACKSSDW512 ...)
(SaturateToInt32Int64x2 ...) => (VPMOVSQD128_128 ...)
(SaturateToInt32Int64x4 ...) => (VPMOVSQD128_256 ...)
(SaturateToInt32Int64x8 ...) => (VPMOVSQD256 ...)
-(SaturateToUint8Int16x8 ...) => (VPMOVSWB128_128 ...)
-(SaturateToUint8Int16x16 ...) => (VPMOVSWB128_256 ...)
-(SaturateToUint8Int32x4 ...) => (VPMOVSDB128_128 ...)
-(SaturateToUint8Int32x8 ...) => (VPMOVSDB128_256 ...)
-(SaturateToUint8Int32x16 ...) => (VPMOVSDB128_512 ...)
-(SaturateToUint8Int64x2 ...) => (VPMOVSQB128_128 ...)
-(SaturateToUint8Int64x4 ...) => (VPMOVSQB128_256 ...)
-(SaturateToUint8Int64x8 ...) => (VPMOVSQB128_512 ...)
+(SaturateToUint8Uint16x8 ...) => (VPMOVUSWB128_128 ...)
+(SaturateToUint8Uint16x16 ...) => (VPMOVUSWB128_256 ...)
(SaturateToUint8Uint16x32 ...) => (VPMOVUSWB256 ...)
+(SaturateToUint8Uint32x4 ...) => (VPMOVUSDB128_128 ...)
+(SaturateToUint8Uint32x8 ...) => (VPMOVUSDB128_256 ...)
+(SaturateToUint8Uint32x16 ...) => (VPMOVUSDB128_512 ...)
+(SaturateToUint8Uint64x2 ...) => (VPMOVUSQB128_128 ...)
+(SaturateToUint8Uint64x4 ...) => (VPMOVUSQB128_256 ...)
+(SaturateToUint8Uint64x8 ...) => (VPMOVUSQB128_512 ...)
(SaturateToUint16Uint32x4 ...) => (VPMOVUSDW128_128 ...)
(SaturateToUint16Uint32x8 ...) => (VPMOVUSDW128_256 ...)
(SaturateToUint16Uint32x16 ...) => (VPMOVUSDW256 ...)
(SaturateToUint16Uint64x2 ...) => (VPMOVUSQW128_128 ...)
(SaturateToUint16Uint64x4 ...) => (VPMOVUSQW128_256 ...)
(SaturateToUint16Uint64x8 ...) => (VPMOVUSQW128_512 ...)
-(SaturateToUint16ConcatUint32x4 ...) => (VPACKUSDW128 ...)
-(SaturateToUint16ConcatUint32x8 ...) => (VPACKUSDW256 ...)
-(SaturateToUint16ConcatUint32x16 ...) => (VPACKUSDW512 ...)
+(SaturateToUint16ConcatInt32x4 ...) => (VPACKUSDW128 ...)
+(SaturateToUint16ConcatGroupedInt32x8 ...) => (VPACKUSDW256 ...)
+(SaturateToUint16ConcatGroupedInt32x16 ...) => (VPACKUSDW512 ...)
(SaturateToUint32Uint64x2 ...) => (VPMOVUSQD128_128 ...)
(SaturateToUint32Uint64x4 ...) => (VPMOVUSQD128_256 ...)
(SaturateToUint32Uint64x8 ...) => (VPMOVUSQD256 ...)
@@ -1223,19 +1211,19 @@
(SubUint64x4 ...) => (VPSUBQ256 ...)
(SubUint64x8 ...) => (VPSUBQ512 ...)
(SubPairsFloat32x4 ...) => (VHSUBPS128 ...)
-(SubPairsFloat32x8 ...) => (VHSUBPS256 ...)
(SubPairsFloat64x2 ...) => (VHSUBPD128 ...)
-(SubPairsFloat64x4 ...) => (VHSUBPD256 ...)
(SubPairsInt16x8 ...) => (VPHSUBW128 ...)
-(SubPairsInt16x16 ...) => (VPHSUBW256 ...)
(SubPairsInt32x4 ...) => (VPHSUBD128 ...)
-(SubPairsInt32x8 ...) => (VPHSUBD256 ...)
(SubPairsUint16x8 ...) => (VPHSUBW128 ...)
-(SubPairsUint16x16 ...) => (VPHSUBW256 ...)
(SubPairsUint32x4 ...) => (VPHSUBD128 ...)
-(SubPairsUint32x8 ...) => (VPHSUBD256 ...)
+(SubPairsGroupedFloat32x8 ...) => (VHSUBPS256 ...)
+(SubPairsGroupedFloat64x4 ...) => (VHSUBPD256 ...)
+(SubPairsGroupedInt16x16 ...) => (VPHSUBW256 ...)
+(SubPairsGroupedInt32x8 ...) => (VPHSUBD256 ...)
+(SubPairsGroupedUint16x16 ...) => (VPHSUBW256 ...)
+(SubPairsGroupedUint32x8 ...) => (VPHSUBD256 ...)
(SubPairsSaturatedInt16x8 ...) => (VPHSUBSW128 ...)
-(SubPairsSaturatedInt16x16 ...) => (VPHSUBSW256 ...)
+(SubPairsSaturatedGroupedInt16x16 ...) => (VPHSUBSW256 ...)
(SubSaturatedInt8x16 ...) => (VPSUBSB128 ...)
(SubSaturatedInt8x32 ...) => (VPSUBSB256 ...)
(SubSaturatedInt8x64 ...) => (VPSUBSB512 ...)
@@ -1547,12 +1535,6 @@
(VMOVDQU16Masked128 (VPMADDUBSW128 x y) mask) => (VPMADDUBSWMasked128 x y mask)
(VMOVDQU16Masked256 (VPMADDUBSW256 x y) mask) => (VPMADDUBSWMasked256 x y mask)
(VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512 x y mask)
-(VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask) => (VPDPBUSDMasked128 x y z mask)
-(VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask) => (VPDPBUSDMasked256 x y z mask)
-(VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) => (VPDPBUSDMasked512 x y z mask)
-(VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask) => (VPDPBUSDSMasked128 x y z mask)
-(VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask) => (VPDPBUSDSMasked256 x y z mask)
-(VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) => (VPDPBUSDSMasked512 x y z mask)
(VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) => (VPMOVSXBQMasked128 x mask)
(VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) => (VPMOVSXWQMasked128 x mask)
(VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask)
@@ -1775,9 +1757,9 @@
(VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) => (VPMOVSQBMasked128_128 x mask)
(VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) => (VPMOVSQBMasked128_256 x mask)
(VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512 x mask)
-(VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask)
(VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) => (VPACKSSDWMasked256 x y mask)
(VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512 x y mask)
+(VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask)
(VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) => (VPMOVSDWMasked128_128 x mask)
(VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) => (VPMOVSDWMasked128_256 x mask)
(VMOVDQU32Masked256 (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256 x mask)
@@ -1787,10 +1769,18 @@
(VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) => (VPMOVSQDMasked128_128 x mask)
(VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) => (VPMOVSQDMasked128_256 x mask)
(VMOVDQU64Masked256 (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256 x mask)
+(VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) => (VPMOVUSWBMasked128_128 x mask)
+(VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) => (VPMOVUSWBMasked128_256 x mask)
(VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256 x mask)
-(VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask)
+(VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) => (VPMOVUSDBMasked128_128 x mask)
+(VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) => (VPMOVUSDBMasked128_256 x mask)
+(VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512 x mask)
+(VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) => (VPMOVUSQBMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) => (VPMOVUSQBMasked128_256 x mask)
+(VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512 x mask)
(VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask)
(VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask)
+(VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask)
(VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) => (VPMOVUSDWMasked128_128 x mask)
(VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) => (VPMOVUSDWMasked128_256 x mask)
(VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256 x mask)
@@ -2018,6 +2008,7 @@
(VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask)
(VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512Merging dst x mask)
(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask)
+(VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512Merging dst x mask)
(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask)
(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask)
@@ -2071,6 +2062,7 @@
(VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512Merging dst x mask)
(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask)
(VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512Merging dst x mask)
(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask)
(VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512Merging dst x mask)
(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask)
@@ -2235,9 +2227,12 @@
(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
@@ -2396,9 +2391,12 @@
(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
@@ -2511,30 +2509,30 @@
(VPANDNQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked128load {sym} [off] x ptr mask mem)
(VPANDNQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked256load {sym} [off] x ptr mask mem)
(VPANDNQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked512load {sym} [off] x ptr mask mem)
-(VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+(VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS128load {sym} [off] x y ptr mem)
(VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D128load {sym} [off] x y ptr mem)
(VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS256load {sym} [off] x y ptr mem)
@@ -2655,54 +2653,46 @@
(VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked128load {sym} [off] x ptr mask mem)
(VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked256load {sym} [off] x ptr mask mem)
(VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked512load {sym} [off] x ptr mask mem)
-(VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD512load {sym} [off] x y ptr mem)
-(VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem)
-(VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem)
-(VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem)
-(VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS512load {sym} [off] x y ptr mem)
-(VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem)
-(VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem)
-(VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem)
(VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD512load {sym} [off] x ptr mem)
(VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ512load {sym} [off] x ptr mem)
-(VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+(VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPCMPGTD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD512load {sym} [off] x ptr mem)
(VPCMPGTQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ512load {sym} [off] x ptr mem)
-(VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+(VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
(VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ512load {sym} [off] x ptr mem)
(VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ512load {sym} [off] x ptr mem)
(VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem)
@@ -2883,30 +2873,30 @@
(VRSQRT14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked128load {sym} [off] ptr mask mem)
(VRSQRT14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked256load {sym} [off] ptr mask mem)
(VRSQRT14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked512load {sym} [off] ptr mask mem)
-(VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+(VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
(VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD128load {sym} [off] x ptr mem)
(VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD256load {sym} [off] x ptr mem)
(VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD512load {sym} [off] x ptr mem)
@@ -2932,13 +2922,13 @@
(VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked256load {sym} [off] x ptr mask mem)
(VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked512load {sym} [off] x ptr mask mem)
(VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem)
-(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem)
(VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem)
(VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked512load {sym} [off] x ptr mask mem)
+(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem)
(VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW512load {sym} [off] x ptr mem)
-(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem)
(VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked256load {sym} [off] x ptr mask mem)
(VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked512load {sym} [off] x ptr mask mem)
+(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem)
(VSCALEFPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS128load {sym} [off] x ptr mem)
(VSCALEFPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS256load {sym} [off] x ptr mem)
(VSCALEFPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS512load {sym} [off] x ptr mem)
@@ -2951,30 +2941,30 @@
(VSCALEFPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked128load {sym} [off] x ptr mask mem)
(VSCALEFPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked256load {sym} [off] x ptr mask mem)
(VSCALEFPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked512load {sym} [off] x ptr mask mem)
-(VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
-(VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+(VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
+(VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
(VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD512load {sym} [off] x ptr mem)
(VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ512load {sym} [off] x ptr mem)
(VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD128load {sym} [off] x y ptr mem)
@@ -3059,41 +3049,41 @@
(VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked512load {sym} [off] x ptr mask mem)
(VPBLENDMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMDMasked512load {sym} [off] x ptr mask mem)
(VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMQMasked512load {sym} [off] x ptr mask mem)
-(VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
-(VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
-(VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
-(VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
-(VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
-(VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
-(VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
-(VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
-(VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+(VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
+(VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
+(VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
+(VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
+(VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
+(VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
+(VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
+(VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
+(VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
index f38d24fde7..648e372fb4 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@@ -1,4 +1,4 @@
-// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
package main
@@ -452,18 +452,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPCOMPRESSWMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCOMPRESSWMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false},
- {name: "VPDPBUSD128", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
- {name: "VPDPBUSD256", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
- {name: "VPDPBUSD512", argLength: 3, reg: w31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
- {name: "VPDPBUSDMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true},
- {name: "VPDPBUSDMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true},
- {name: "VPDPBUSDMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true},
- {name: "VPDPBUSDS128", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
- {name: "VPDPBUSDS256", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
- {name: "VPDPBUSDS512", argLength: 3, reg: w31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
- {name: "VPDPBUSDSMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
- {name: "VPDPBUSDSMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
- {name: "VPDPBUSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPDPWSSD128", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPDPWSSD256", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPDPWSSD512", argLength: 3, reg: w31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
@@ -780,12 +768,24 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPMOVUSDB128_128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSDB128_256", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSDB128_512", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSDW128_128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSDW128_256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSDW256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVUSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPMOVUSQB128_128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSQB128_256", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSQB128_512", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSQD128_128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSQD128_256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSQD256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -798,7 +798,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMOVUSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSWB128_128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSWB128_256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSWB256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VPMOVUSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VPMOVUSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVUSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPMOVWB128_128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPMOVWB128_256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -1698,14 +1702,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPCMPEQQ512load", argLength: 3, reg: w2kload, asm: "VPCMPEQQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTD512load", argLength: 3, reg: w2kload, asm: "VPCMPGTD", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
{name: "VPCMPGTQ512load", argLength: 3, reg: w2kload, asm: "VPCMPGTQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false},
- {name: "VPDPBUSD512load", argLength: 4, reg: w31load, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDS512load", argLength: 4, reg: w31load, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDSMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDSMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
- {name: "VPDPBUSDSMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSD512load", argLength: 4, reg: w31load, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
{name: "VPDPWSSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
@@ -2382,15 +2378,23 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMOVSXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVSXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVSXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: true},
+ {name: "VPMOVUSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPMOVUSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPMOVUSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPMOVUSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPMOVUSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPMOVUSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVUSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPMOVUSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPMOVUSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVUSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPMOVWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPMOVWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true},
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index a68d8c4122..889ab0d84f 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -1,4 +1,4 @@
-// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
package main
@@ -48,19 +48,19 @@ func simdGenericOps() []opData {
{name: "AddInt64x4", argLength: 2, commutative: true},
{name: "AddInt64x8", argLength: 2, commutative: true},
{name: "AddPairsFloat32x4", argLength: 2, commutative: false},
- {name: "AddPairsFloat32x8", argLength: 2, commutative: false},
{name: "AddPairsFloat64x2", argLength: 2, commutative: false},
- {name: "AddPairsFloat64x4", argLength: 2, commutative: false},
+ {name: "AddPairsGroupedFloat32x8", argLength: 2, commutative: false},
+ {name: "AddPairsGroupedFloat64x4", argLength: 2, commutative: false},
+ {name: "AddPairsGroupedInt16x16", argLength: 2, commutative: false},
+ {name: "AddPairsGroupedInt32x8", argLength: 2, commutative: false},
+ {name: "AddPairsGroupedUint16x16", argLength: 2, commutative: false},
+ {name: "AddPairsGroupedUint32x8", argLength: 2, commutative: false},
{name: "AddPairsInt16x8", argLength: 2, commutative: false},
- {name: "AddPairsInt16x16", argLength: 2, commutative: false},
{name: "AddPairsInt32x4", argLength: 2, commutative: false},
- {name: "AddPairsInt32x8", argLength: 2, commutative: false},
+ {name: "AddPairsSaturatedGroupedInt16x16", argLength: 2, commutative: false},
{name: "AddPairsSaturatedInt16x8", argLength: 2, commutative: false},
- {name: "AddPairsSaturatedInt16x16", argLength: 2, commutative: false},
{name: "AddPairsUint16x8", argLength: 2, commutative: false},
- {name: "AddPairsUint16x16", argLength: 2, commutative: false},
{name: "AddPairsUint32x4", argLength: 2, commutative: false},
- {name: "AddPairsUint32x8", argLength: 2, commutative: false},
{name: "AddSaturatedInt8x16", argLength: 2, commutative: true},
{name: "AddSaturatedInt8x32", argLength: 2, commutative: true},
{name: "AddSaturatedInt8x64", argLength: 2, commutative: true},
@@ -304,12 +304,6 @@ func simdGenericOps() []opData {
{name: "DotProductPairsSaturatedUint8x16", argLength: 2, commutative: false},
{name: "DotProductPairsSaturatedUint8x32", argLength: 2, commutative: false},
{name: "DotProductPairsSaturatedUint8x64", argLength: 2, commutative: false},
- {name: "DotProductQuadrupleInt32x4", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleInt32x8", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleInt32x16", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
- {name: "DotProductQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
{name: "EqualFloat32x4", argLength: 2, commutative: true},
{name: "EqualFloat32x8", argLength: 2, commutative: true},
{name: "EqualFloat32x16", argLength: 2, commutative: true},
@@ -370,26 +364,26 @@ func simdGenericOps() []opData {
{name: "ExpandUint64x2", argLength: 2, commutative: false},
{name: "ExpandUint64x4", argLength: 2, commutative: false},
{name: "ExpandUint64x8", argLength: 2, commutative: false},
- {name: "ExtendLo2ToInt64x2Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo2ToInt64x2Int16x8", argLength: 1, commutative: false},
- {name: "ExtendLo2ToInt64x2Int32x4", argLength: 1, commutative: false},
- {name: "ExtendLo2ToUint64x2Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo2ToUint64x2Uint16x8", argLength: 1, commutative: false},
- {name: "ExtendLo2ToUint64x2Uint32x4", argLength: 1, commutative: false},
- {name: "ExtendLo4ToInt32x4Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo4ToInt32x4Int16x8", argLength: 1, commutative: false},
- {name: "ExtendLo4ToInt64x4Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo4ToInt64x4Int16x8", argLength: 1, commutative: false},
- {name: "ExtendLo4ToUint32x4Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo4ToUint32x4Uint16x8", argLength: 1, commutative: false},
- {name: "ExtendLo4ToUint64x4Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo4ToUint64x4Uint16x8", argLength: 1, commutative: false},
- {name: "ExtendLo8ToInt16x8Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToInt32x8Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToInt64x8Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToUint16x8Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToUint32x8Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToUint64x8Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToInt64Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToInt64Int16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToInt64Int32x4", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToUint64Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToUint64Uint16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToUint64Uint32x4", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToInt32Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToInt32Int16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToInt64Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToInt64Int16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToUint32Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToUint32Uint16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToUint64Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToUint64Uint16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToInt16Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToInt32Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToInt64Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToUint16Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToUint32Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToUint64Uint8x16", argLength: 1, commutative: false},
{name: "ExtendToInt16Int8x16", argLength: 1, commutative: false},
{name: "ExtendToInt16Int8x32", argLength: 1, commutative: false},
{name: "ExtendToInt32Int8x16", argLength: 1, commutative: false},
@@ -525,12 +519,6 @@ func simdGenericOps() []opData {
{name: "InterleaveLoUint16x8", argLength: 2, commutative: false},
{name: "InterleaveLoUint32x4", argLength: 2, commutative: false},
{name: "InterleaveLoUint64x2", argLength: 2, commutative: false},
- {name: "IsNanFloat32x4", argLength: 2, commutative: true},
- {name: "IsNanFloat32x8", argLength: 2, commutative: true},
- {name: "IsNanFloat32x16", argLength: 2, commutative: true},
- {name: "IsNanFloat64x2", argLength: 2, commutative: true},
- {name: "IsNanFloat64x4", argLength: 2, commutative: true},
- {name: "IsNanFloat64x8", argLength: 2, commutative: true},
{name: "LeadingZerosInt32x4", argLength: 1, commutative: false},
{name: "LeadingZerosInt32x8", argLength: 1, commutative: false},
{name: "LeadingZerosInt32x16", argLength: 1, commutative: false},
@@ -830,9 +818,9 @@ func simdGenericOps() []opData {
{name: "SaturateToInt8Int64x2", argLength: 1, commutative: false},
{name: "SaturateToInt8Int64x4", argLength: 1, commutative: false},
{name: "SaturateToInt8Int64x8", argLength: 1, commutative: false},
+ {name: "SaturateToInt16ConcatGroupedInt32x8", argLength: 2, commutative: false},
+ {name: "SaturateToInt16ConcatGroupedInt32x16", argLength: 2, commutative: false},
{name: "SaturateToInt16ConcatInt32x4", argLength: 2, commutative: false},
- {name: "SaturateToInt16ConcatInt32x8", argLength: 2, commutative: false},
- {name: "SaturateToInt16ConcatInt32x16", argLength: 2, commutative: false},
{name: "SaturateToInt16Int32x4", argLength: 1, commutative: false},
{name: "SaturateToInt16Int32x8", argLength: 1, commutative: false},
{name: "SaturateToInt16Int32x16", argLength: 1, commutative: false},
@@ -842,18 +830,18 @@ func simdGenericOps() []opData {
{name: "SaturateToInt32Int64x2", argLength: 1, commutative: false},
{name: "SaturateToInt32Int64x4", argLength: 1, commutative: false},
{name: "SaturateToInt32Int64x8", argLength: 1, commutative: false},
- {name: "SaturateToUint8Int16x8", argLength: 1, commutative: false},
- {name: "SaturateToUint8Int16x16", argLength: 1, commutative: false},
- {name: "SaturateToUint8Int32x4", argLength: 1, commutative: false},
- {name: "SaturateToUint8Int32x8", argLength: 1, commutative: false},
- {name: "SaturateToUint8Int32x16", argLength: 1, commutative: false},
- {name: "SaturateToUint8Int64x2", argLength: 1, commutative: false},
- {name: "SaturateToUint8Int64x4", argLength: 1, commutative: false},
- {name: "SaturateToUint8Int64x8", argLength: 1, commutative: false},
+ {name: "SaturateToUint8Uint16x8", argLength: 1, commutative: false},
+ {name: "SaturateToUint8Uint16x16", argLength: 1, commutative: false},
{name: "SaturateToUint8Uint16x32", argLength: 1, commutative: false},
- {name: "SaturateToUint16ConcatUint32x4", argLength: 2, commutative: false},
- {name: "SaturateToUint16ConcatUint32x8", argLength: 2, commutative: false},
- {name: "SaturateToUint16ConcatUint32x16", argLength: 2, commutative: false},
+ {name: "SaturateToUint8Uint32x4", argLength: 1, commutative: false},
+ {name: "SaturateToUint8Uint32x8", argLength: 1, commutative: false},
+ {name: "SaturateToUint8Uint32x16", argLength: 1, commutative: false},
+ {name: "SaturateToUint8Uint64x2", argLength: 1, commutative: false},
+ {name: "SaturateToUint8Uint64x4", argLength: 1, commutative: false},
+ {name: "SaturateToUint8Uint64x8", argLength: 1, commutative: false},
+ {name: "SaturateToUint16ConcatGroupedInt32x8", argLength: 2, commutative: false},
+ {name: "SaturateToUint16ConcatGroupedInt32x16", argLength: 2, commutative: false},
+ {name: "SaturateToUint16ConcatInt32x4", argLength: 2, commutative: false},
{name: "SaturateToUint16Uint32x4", argLength: 1, commutative: false},
{name: "SaturateToUint16Uint32x8", argLength: 1, commutative: false},
{name: "SaturateToUint16Uint32x16", argLength: 1, commutative: false},
@@ -1042,19 +1030,19 @@ func simdGenericOps() []opData {
{name: "SubInt64x4", argLength: 2, commutative: false},
{name: "SubInt64x8", argLength: 2, commutative: false},
{name: "SubPairsFloat32x4", argLength: 2, commutative: false},
- {name: "SubPairsFloat32x8", argLength: 2, commutative: false},
{name: "SubPairsFloat64x2", argLength: 2, commutative: false},
- {name: "SubPairsFloat64x4", argLength: 2, commutative: false},
+ {name: "SubPairsGroupedFloat32x8", argLength: 2, commutative: false},
+ {name: "SubPairsGroupedFloat64x4", argLength: 2, commutative: false},
+ {name: "SubPairsGroupedInt16x16", argLength: 2, commutative: false},
+ {name: "SubPairsGroupedInt32x8", argLength: 2, commutative: false},
+ {name: "SubPairsGroupedUint16x16", argLength: 2, commutative: false},
+ {name: "SubPairsGroupedUint32x8", argLength: 2, commutative: false},
{name: "SubPairsInt16x8", argLength: 2, commutative: false},
- {name: "SubPairsInt16x16", argLength: 2, commutative: false},
{name: "SubPairsInt32x4", argLength: 2, commutative: false},
- {name: "SubPairsInt32x8", argLength: 2, commutative: false},
+ {name: "SubPairsSaturatedGroupedInt16x16", argLength: 2, commutative: false},
{name: "SubPairsSaturatedInt16x8", argLength: 2, commutative: false},
- {name: "SubPairsSaturatedInt16x16", argLength: 2, commutative: false},
{name: "SubPairsUint16x8", argLength: 2, commutative: false},
- {name: "SubPairsUint16x16", argLength: 2, commutative: false},
{name: "SubPairsUint32x4", argLength: 2, commutative: false},
- {name: "SubPairsUint32x8", argLength: 2, commutative: false},
{name: "SubSaturatedInt8x16", argLength: 2, commutative: false},
{name: "SubSaturatedInt8x32", argLength: 2, commutative: false},
{name: "SubSaturatedInt8x64", argLength: 2, commutative: false},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 00d581ec9a..7b70dc2686 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1214,6 +1214,12 @@ const (
OpAMD64VPMOVVec64x2ToM
OpAMD64VPMOVVec64x4ToM
OpAMD64VPMOVVec64x8ToM
+ OpAMD64VPMOVMSKB128
+ OpAMD64VPMOVMSKB256
+ OpAMD64VMOVMSKPS128
+ OpAMD64VMOVMSKPS256
+ OpAMD64VMOVMSKPD128
+ OpAMD64VMOVMSKPD256
OpAMD64Zero128
OpAMD64Zero256
OpAMD64Zero512
@@ -1693,18 +1699,6 @@ const (
OpAMD64VPCOMPRESSWMasked128
OpAMD64VPCOMPRESSWMasked256
OpAMD64VPCOMPRESSWMasked512
- OpAMD64VPDPBUSD128
- OpAMD64VPDPBUSD256
- OpAMD64VPDPBUSD512
- OpAMD64VPDPBUSDMasked128
- OpAMD64VPDPBUSDMasked256
- OpAMD64VPDPBUSDMasked512
- OpAMD64VPDPBUSDS128
- OpAMD64VPDPBUSDS256
- OpAMD64VPDPBUSDS512
- OpAMD64VPDPBUSDSMasked128
- OpAMD64VPDPBUSDSMasked256
- OpAMD64VPDPBUSDSMasked512
OpAMD64VPDPWSSD128
OpAMD64VPDPWSSD256
OpAMD64VPDPWSSD512
@@ -2021,12 +2015,24 @@ const (
OpAMD64VPMOVSXWQMasked128
OpAMD64VPMOVSXWQMasked256
OpAMD64VPMOVSXWQMasked512
+ OpAMD64VPMOVUSDB128_128
+ OpAMD64VPMOVUSDB128_256
+ OpAMD64VPMOVUSDB128_512
+ OpAMD64VPMOVUSDBMasked128_128
+ OpAMD64VPMOVUSDBMasked128_256
+ OpAMD64VPMOVUSDBMasked128_512
OpAMD64VPMOVUSDW128_128
OpAMD64VPMOVUSDW128_256
OpAMD64VPMOVUSDW256
OpAMD64VPMOVUSDWMasked128_128
OpAMD64VPMOVUSDWMasked128_256
OpAMD64VPMOVUSDWMasked256
+ OpAMD64VPMOVUSQB128_128
+ OpAMD64VPMOVUSQB128_256
+ OpAMD64VPMOVUSQB128_512
+ OpAMD64VPMOVUSQBMasked128_128
+ OpAMD64VPMOVUSQBMasked128_256
+ OpAMD64VPMOVUSQBMasked128_512
OpAMD64VPMOVUSQD128_128
OpAMD64VPMOVUSQD128_256
OpAMD64VPMOVUSQD256
@@ -2039,7 +2045,11 @@ const (
OpAMD64VPMOVUSQWMasked128_128
OpAMD64VPMOVUSQWMasked128_256
OpAMD64VPMOVUSQWMasked128_512
+ OpAMD64VPMOVUSWB128_128
+ OpAMD64VPMOVUSWB128_256
OpAMD64VPMOVUSWB256
+ OpAMD64VPMOVUSWBMasked128_128
+ OpAMD64VPMOVUSWBMasked128_256
OpAMD64VPMOVUSWBMasked256
OpAMD64VPMOVWB128_128
OpAMD64VPMOVWB128_256
@@ -2939,14 +2949,6 @@ const (
OpAMD64VPCMPEQQ512load
OpAMD64VPCMPGTD512load
OpAMD64VPCMPGTQ512load
- OpAMD64VPDPBUSD512load
- OpAMD64VPDPBUSDMasked128load
- OpAMD64VPDPBUSDMasked256load
- OpAMD64VPDPBUSDMasked512load
- OpAMD64VPDPBUSDS512load
- OpAMD64VPDPBUSDSMasked128load
- OpAMD64VPDPBUSDSMasked256load
- OpAMD64VPDPBUSDSMasked512load
OpAMD64VPDPWSSD512load
OpAMD64VPDPWSSDMasked128load
OpAMD64VPDPWSSDMasked256load
@@ -3623,15 +3625,23 @@ const (
OpAMD64VPMOVSXWQMasked128Merging
OpAMD64VPMOVSXWQMasked256Merging
OpAMD64VPMOVSXWQMasked512Merging
+ OpAMD64VPMOVUSDBMasked128_128Merging
+ OpAMD64VPMOVUSDBMasked128_256Merging
+ OpAMD64VPMOVUSDBMasked128_512Merging
OpAMD64VPMOVUSDWMasked128_128Merging
OpAMD64VPMOVUSDWMasked128_256Merging
OpAMD64VPMOVUSDWMasked256Merging
+ OpAMD64VPMOVUSQBMasked128_128Merging
+ OpAMD64VPMOVUSQBMasked128_256Merging
+ OpAMD64VPMOVUSQBMasked128_512Merging
OpAMD64VPMOVUSQDMasked128_128Merging
OpAMD64VPMOVUSQDMasked128_256Merging
OpAMD64VPMOVUSQDMasked256Merging
OpAMD64VPMOVUSQWMasked128_128Merging
OpAMD64VPMOVUSQWMasked128_256Merging
OpAMD64VPMOVUSQWMasked128_512Merging
+ OpAMD64VPMOVUSWBMasked128_128Merging
+ OpAMD64VPMOVUSWBMasked128_256Merging
OpAMD64VPMOVUSWBMasked256Merging
OpAMD64VPMOVWBMasked128_128Merging
OpAMD64VPMOVWBMasked128_256Merging
@@ -6154,6 +6164,12 @@ const (
OpCvtMask64x4to8
OpCvtMask64x8to8
OpIsZeroVec
+ OpIsNaNFloat32x4
+ OpIsNaNFloat32x8
+ OpIsNaNFloat32x16
+ OpIsNaNFloat64x2
+ OpIsNaNFloat64x4
+ OpIsNaNFloat64x8
OpAESDecryptLastRoundUint8x16
OpAESDecryptLastRoundUint8x32
OpAESDecryptLastRoundUint8x64
@@ -6198,19 +6214,19 @@ const (
OpAddInt64x4
OpAddInt64x8
OpAddPairsFloat32x4
- OpAddPairsFloat32x8
OpAddPairsFloat64x2
- OpAddPairsFloat64x4
+ OpAddPairsGroupedFloat32x8
+ OpAddPairsGroupedFloat64x4
+ OpAddPairsGroupedInt16x16
+ OpAddPairsGroupedInt32x8
+ OpAddPairsGroupedUint16x16
+ OpAddPairsGroupedUint32x8
OpAddPairsInt16x8
- OpAddPairsInt16x16
OpAddPairsInt32x4
- OpAddPairsInt32x8
+ OpAddPairsSaturatedGroupedInt16x16
OpAddPairsSaturatedInt16x8
- OpAddPairsSaturatedInt16x16
OpAddPairsUint16x8
- OpAddPairsUint16x16
OpAddPairsUint32x4
- OpAddPairsUint32x8
OpAddSaturatedInt8x16
OpAddSaturatedInt8x32
OpAddSaturatedInt8x64
@@ -6454,12 +6470,6 @@ const (
OpDotProductPairsSaturatedUint8x16
OpDotProductPairsSaturatedUint8x32
OpDotProductPairsSaturatedUint8x64
- OpDotProductQuadrupleInt32x4
- OpDotProductQuadrupleInt32x8
- OpDotProductQuadrupleInt32x16
- OpDotProductQuadrupleSaturatedInt32x4
- OpDotProductQuadrupleSaturatedInt32x8
- OpDotProductQuadrupleSaturatedInt32x16
OpEqualFloat32x4
OpEqualFloat32x8
OpEqualFloat32x16
@@ -6520,26 +6530,26 @@ const (
OpExpandUint64x2
OpExpandUint64x4
OpExpandUint64x8
- OpExtendLo2ToInt64x2Int8x16
- OpExtendLo2ToInt64x2Int16x8
- OpExtendLo2ToInt64x2Int32x4
- OpExtendLo2ToUint64x2Uint8x16
- OpExtendLo2ToUint64x2Uint16x8
- OpExtendLo2ToUint64x2Uint32x4
- OpExtendLo4ToInt32x4Int8x16
- OpExtendLo4ToInt32x4Int16x8
- OpExtendLo4ToInt64x4Int8x16
- OpExtendLo4ToInt64x4Int16x8
- OpExtendLo4ToUint32x4Uint8x16
- OpExtendLo4ToUint32x4Uint16x8
- OpExtendLo4ToUint64x4Uint8x16
- OpExtendLo4ToUint64x4Uint16x8
- OpExtendLo8ToInt16x8Int8x16
- OpExtendLo8ToInt32x8Int8x16
- OpExtendLo8ToInt64x8Int8x16
- OpExtendLo8ToUint16x8Uint8x16
- OpExtendLo8ToUint32x8Uint8x16
- OpExtendLo8ToUint64x8Uint8x16
+ OpExtendLo2ToInt64Int8x16
+ OpExtendLo2ToInt64Int16x8
+ OpExtendLo2ToInt64Int32x4
+ OpExtendLo2ToUint64Uint8x16
+ OpExtendLo2ToUint64Uint16x8
+ OpExtendLo2ToUint64Uint32x4
+ OpExtendLo4ToInt32Int8x16
+ OpExtendLo4ToInt32Int16x8
+ OpExtendLo4ToInt64Int8x16
+ OpExtendLo4ToInt64Int16x8
+ OpExtendLo4ToUint32Uint8x16
+ OpExtendLo4ToUint32Uint16x8
+ OpExtendLo4ToUint64Uint8x16
+ OpExtendLo4ToUint64Uint16x8
+ OpExtendLo8ToInt16Int8x16
+ OpExtendLo8ToInt32Int8x16
+ OpExtendLo8ToInt64Int8x16
+ OpExtendLo8ToUint16Uint8x16
+ OpExtendLo8ToUint32Uint8x16
+ OpExtendLo8ToUint64Uint8x16
OpExtendToInt16Int8x16
OpExtendToInt16Int8x32
OpExtendToInt32Int8x16
@@ -6675,12 +6685,6 @@ const (
OpInterleaveLoUint16x8
OpInterleaveLoUint32x4
OpInterleaveLoUint64x2
- OpIsNanFloat32x4
- OpIsNanFloat32x8
- OpIsNanFloat32x16
- OpIsNanFloat64x2
- OpIsNanFloat64x4
- OpIsNanFloat64x8
OpLeadingZerosInt32x4
OpLeadingZerosInt32x8
OpLeadingZerosInt32x16
@@ -6980,9 +6984,9 @@ const (
OpSaturateToInt8Int64x2
OpSaturateToInt8Int64x4
OpSaturateToInt8Int64x8
+ OpSaturateToInt16ConcatGroupedInt32x8
+ OpSaturateToInt16ConcatGroupedInt32x16
OpSaturateToInt16ConcatInt32x4
- OpSaturateToInt16ConcatInt32x8
- OpSaturateToInt16ConcatInt32x16
OpSaturateToInt16Int32x4
OpSaturateToInt16Int32x8
OpSaturateToInt16Int32x16
@@ -6992,18 +6996,18 @@ const (
OpSaturateToInt32Int64x2
OpSaturateToInt32Int64x4
OpSaturateToInt32Int64x8
- OpSaturateToUint8Int16x8
- OpSaturateToUint8Int16x16
- OpSaturateToUint8Int32x4
- OpSaturateToUint8Int32x8
- OpSaturateToUint8Int32x16
- OpSaturateToUint8Int64x2
- OpSaturateToUint8Int64x4
- OpSaturateToUint8Int64x8
+ OpSaturateToUint8Uint16x8
+ OpSaturateToUint8Uint16x16
OpSaturateToUint8Uint16x32
- OpSaturateToUint16ConcatUint32x4
- OpSaturateToUint16ConcatUint32x8
- OpSaturateToUint16ConcatUint32x16
+ OpSaturateToUint8Uint32x4
+ OpSaturateToUint8Uint32x8
+ OpSaturateToUint8Uint32x16
+ OpSaturateToUint8Uint64x2
+ OpSaturateToUint8Uint64x4
+ OpSaturateToUint8Uint64x8
+ OpSaturateToUint16ConcatGroupedInt32x8
+ OpSaturateToUint16ConcatGroupedInt32x16
+ OpSaturateToUint16ConcatInt32x4
OpSaturateToUint16Uint32x4
OpSaturateToUint16Uint32x8
OpSaturateToUint16Uint32x16
@@ -7192,19 +7196,19 @@ const (
OpSubInt64x4
OpSubInt64x8
OpSubPairsFloat32x4
- OpSubPairsFloat32x8
OpSubPairsFloat64x2
- OpSubPairsFloat64x4
+ OpSubPairsGroupedFloat32x8
+ OpSubPairsGroupedFloat64x4
+ OpSubPairsGroupedInt16x16
+ OpSubPairsGroupedInt32x8
+ OpSubPairsGroupedUint16x16
+ OpSubPairsGroupedUint32x8
OpSubPairsInt16x8
- OpSubPairsInt16x16
OpSubPairsInt32x4
- OpSubPairsInt32x8
+ OpSubPairsSaturatedGroupedInt16x16
OpSubPairsSaturatedInt16x8
- OpSubPairsSaturatedInt16x16
OpSubPairsUint16x8
- OpSubPairsUint16x16
OpSubPairsUint32x4
- OpSubPairsUint32x8
OpSubSaturatedInt8x16
OpSubSaturatedInt8x32
OpSubSaturatedInt8x64
@@ -20354,6 +20358,84 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPMOVMSKB128",
+ argLen: 1,
+ asm: x86.AVPMOVMSKB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ },
+ },
+ },
+ {
+ name: "VPMOVMSKB256",
+ argLen: 1,
+ asm: x86.AVPMOVMSKB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ },
+ },
+ },
+ {
+ name: "VMOVMSKPS128",
+ argLen: 1,
+ asm: x86.AVMOVMSKPS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ },
+ },
+ },
+ {
+ name: "VMOVMSKPS256",
+ argLen: 1,
+ asm: x86.AVMOVMSKPS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ },
+ },
+ },
+ {
+ name: "VMOVMSKPD128",
+ argLen: 1,
+ asm: x86.AVMOVMSKPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ },
+ },
+ },
+ {
+ name: "VMOVMSKPD256",
+ argLen: 1,
+ asm: x86.AVMOVMSKPD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+ },
+ },
+ },
+ {
name: "Zero128",
argLen: 0,
zeroWidth: true,
@@ -27270,204 +27352,6 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPDPBUSD128",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPDPBUSD256",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPDPBUSD512",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked128",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked256",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked512",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDS128",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPDPBUSDS256",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
- },
- outputs: []outputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- },
- },
- {
- name: "VPDPBUSDS512",
- argLen: 3,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked128",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked256",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked512",
- argLen: 4,
- resultInArg0: true,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
name: "VPDPWSSD128",
argLen: 3,
resultInArg0: true,
@@ -32104,6 +31988,87 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPMOVUSDB128_128",
+ argLen: 1,
+ asm: x86.AVPMOVUSDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSDB128_256",
+ argLen: 1,
+ asm: x86.AVPMOVUSDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSDB128_512",
+ argLen: 1,
+ asm: x86.AVPMOVUSDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSDBMasked128_128",
+ argLen: 2,
+ asm: x86.AVPMOVUSDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSDBMasked128_256",
+ argLen: 2,
+ asm: x86.AVPMOVUSDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSDBMasked128_512",
+ argLen: 2,
+ asm: x86.AVPMOVUSDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
name: "VPMOVUSDW128_128",
argLen: 1,
asm: x86.AVPMOVUSDW,
@@ -32185,6 +32150,87 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPMOVUSQB128_128",
+ argLen: 1,
+ asm: x86.AVPMOVUSQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSQB128_256",
+ argLen: 1,
+ asm: x86.AVPMOVUSQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSQB128_512",
+ argLen: 1,
+ asm: x86.AVPMOVUSQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSQBMasked128_128",
+ argLen: 2,
+ asm: x86.AVPMOVUSQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSQBMasked128_256",
+ argLen: 2,
+ asm: x86.AVPMOVUSQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSQBMasked128_512",
+ argLen: 2,
+ asm: x86.AVPMOVUSQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
name: "VPMOVUSQD128_128",
argLen: 1,
asm: x86.AVPMOVUSQD,
@@ -32347,6 +32393,32 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPMOVUSWB128_128",
+ argLen: 1,
+ asm: x86.AVPMOVUSWB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSWB128_256",
+ argLen: 1,
+ asm: x86.AVPMOVUSWB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
name: "VPMOVUSWB256",
argLen: 1,
asm: x86.AVPMOVUSWB,
@@ -32360,6 +32432,34 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPMOVUSWBMasked128_128",
+ argLen: 2,
+ asm: x86.AVPMOVUSWB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSWBMasked128_256",
+ argLen: 2,
+ asm: x86.AVPMOVUSWB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
name: "VPMOVUSWBMasked256",
argLen: 2,
asm: x86.AVPMOVUSWB,
@@ -45952,156 +46052,6 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPDPBUSD512load",
- auxType: auxSymOff,
- argLen: 4,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked128load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked256load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDMasked512load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSD,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDS512load",
- auxType: auxSymOff,
- argLen: 4,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked128load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked256load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
- name: "VPDPBUSDSMasked512load",
- auxType: auxSymOff,
- argLen: 5,
- resultInArg0: true,
- symEffect: SymRead,
- asm: x86.AVPDPBUSDS,
- reg: regInfo{
- inputs: []inputInfo{
- {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
- },
- },
- },
- {
name: "VPDPWSSD512load",
auxType: auxSymOff,
argLen: 4,
@@ -57269,6 +57219,54 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPMOVUSDBMasked128_128Merging",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPMOVUSDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSDBMasked128_256Merging",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPMOVUSDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSDBMasked128_512Merging",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPMOVUSDB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
name: "VPMOVUSDWMasked128_128Merging",
argLen: 3,
resultInArg0: true,
@@ -57317,6 +57315,54 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPMOVUSQBMasked128_128Merging",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPMOVUSQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSQBMasked128_256Merging",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPMOVUSQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSQBMasked128_512Merging",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPMOVUSQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
name: "VPMOVUSQDMasked128_128Merging",
argLen: 3,
resultInArg0: true,
@@ -57413,6 +57459,38 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPMOVUSWBMasked128_128Merging",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPMOVUSWB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VPMOVUSWBMasked128_256Merging",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVPMOVUSWB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
name: "VPMOVUSWBMasked256Merging",
argLen: 3,
resultInArg0: true,
@@ -89000,6 +89078,36 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
+ name: "IsNaNFloat32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat32x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat32x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat64x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat64x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
name: "AESDecryptLastRoundUint8x16",
argLen: 2,
generic: true,
@@ -89238,67 +89346,67 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "AddPairsFloat32x8",
+ name: "AddPairsFloat64x2",
argLen: 2,
generic: true,
},
{
- name: "AddPairsFloat64x2",
+ name: "AddPairsGroupedFloat32x8",
argLen: 2,
generic: true,
},
{
- name: "AddPairsFloat64x4",
+ name: "AddPairsGroupedFloat64x4",
argLen: 2,
generic: true,
},
{
- name: "AddPairsInt16x8",
+ name: "AddPairsGroupedInt16x16",
argLen: 2,
generic: true,
},
{
- name: "AddPairsInt16x16",
+ name: "AddPairsGroupedInt32x8",
argLen: 2,
generic: true,
},
{
- name: "AddPairsInt32x4",
+ name: "AddPairsGroupedUint16x16",
argLen: 2,
generic: true,
},
{
- name: "AddPairsInt32x8",
+ name: "AddPairsGroupedUint32x8",
argLen: 2,
generic: true,
},
{
- name: "AddPairsSaturatedInt16x8",
+ name: "AddPairsInt16x8",
argLen: 2,
generic: true,
},
{
- name: "AddPairsSaturatedInt16x16",
+ name: "AddPairsInt32x4",
argLen: 2,
generic: true,
},
{
- name: "AddPairsUint16x8",
+ name: "AddPairsSaturatedGroupedInt16x16",
argLen: 2,
generic: true,
},
{
- name: "AddPairsUint16x16",
+ name: "AddPairsSaturatedInt16x8",
argLen: 2,
generic: true,
},
{
- name: "AddPairsUint32x4",
+ name: "AddPairsUint16x8",
argLen: 2,
generic: true,
},
{
- name: "AddPairsUint32x8",
+ name: "AddPairsUint32x4",
argLen: 2,
generic: true,
},
@@ -90572,36 +90680,6 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "DotProductQuadrupleInt32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleInt32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleInt32x16",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleSaturatedInt32x4",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleSaturatedInt32x8",
- argLen: 3,
- generic: true,
- },
- {
- name: "DotProductQuadrupleSaturatedInt32x16",
- argLen: 3,
- generic: true,
- },
- {
name: "EqualFloat32x4",
argLen: 2,
commutative: true,
@@ -90932,102 +91010,102 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "ExtendLo2ToInt64x2Int8x16",
+ name: "ExtendLo2ToInt64Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToInt64x2Int16x8",
+ name: "ExtendLo2ToInt64Int16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToInt64x2Int32x4",
+ name: "ExtendLo2ToInt64Int32x4",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToUint64x2Uint8x16",
+ name: "ExtendLo2ToUint64Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToUint64x2Uint16x8",
+ name: "ExtendLo2ToUint64Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToUint64x2Uint32x4",
+ name: "ExtendLo2ToUint64Uint32x4",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToInt32x4Int8x16",
+ name: "ExtendLo4ToInt32Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToInt32x4Int16x8",
+ name: "ExtendLo4ToInt32Int16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToInt64x4Int8x16",
+ name: "ExtendLo4ToInt64Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToInt64x4Int16x8",
+ name: "ExtendLo4ToInt64Int16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToUint32x4Uint8x16",
+ name: "ExtendLo4ToUint32Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToUint32x4Uint16x8",
+ name: "ExtendLo4ToUint32Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToUint64x4Uint8x16",
+ name: "ExtendLo4ToUint64Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToUint64x4Uint16x8",
+ name: "ExtendLo4ToUint64Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToInt16x8Int8x16",
+ name: "ExtendLo8ToInt16Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToInt32x8Int8x16",
+ name: "ExtendLo8ToInt32Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToInt64x8Int8x16",
+ name: "ExtendLo8ToInt64Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToUint16x8Uint8x16",
+ name: "ExtendLo8ToUint16Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToUint32x8Uint8x16",
+ name: "ExtendLo8ToUint32Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToUint64x8Uint8x16",
+ name: "ExtendLo8ToUint64Uint8x16",
argLen: 1,
generic: true,
},
@@ -91707,42 +91785,6 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "IsNanFloat32x4",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat32x8",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat32x16",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat64x2",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat64x4",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat64x8",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
name: "LeadingZerosInt32x4",
argLen: 1,
generic: true,
@@ -93370,17 +93412,17 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "SaturateToInt16ConcatInt32x4",
+ name: "SaturateToInt16ConcatGroupedInt32x8",
argLen: 2,
generic: true,
},
{
- name: "SaturateToInt16ConcatInt32x8",
+ name: "SaturateToInt16ConcatGroupedInt32x16",
argLen: 2,
generic: true,
},
{
- name: "SaturateToInt16ConcatInt32x16",
+ name: "SaturateToInt16ConcatInt32x4",
argLen: 2,
generic: true,
},
@@ -93430,62 +93472,62 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "SaturateToUint8Int16x8",
+ name: "SaturateToUint8Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "SaturateToUint8Int16x16",
+ name: "SaturateToUint8Uint16x16",
argLen: 1,
generic: true,
},
{
- name: "SaturateToUint8Int32x4",
+ name: "SaturateToUint8Uint16x32",
argLen: 1,
generic: true,
},
{
- name: "SaturateToUint8Int32x8",
+ name: "SaturateToUint8Uint32x4",
argLen: 1,
generic: true,
},
{
- name: "SaturateToUint8Int32x16",
+ name: "SaturateToUint8Uint32x8",
argLen: 1,
generic: true,
},
{
- name: "SaturateToUint8Int64x2",
+ name: "SaturateToUint8Uint32x16",
argLen: 1,
generic: true,
},
{
- name: "SaturateToUint8Int64x4",
+ name: "SaturateToUint8Uint64x2",
argLen: 1,
generic: true,
},
{
- name: "SaturateToUint8Int64x8",
+ name: "SaturateToUint8Uint64x4",
argLen: 1,
generic: true,
},
{
- name: "SaturateToUint8Uint16x32",
+ name: "SaturateToUint8Uint64x8",
argLen: 1,
generic: true,
},
{
- name: "SaturateToUint16ConcatUint32x4",
+ name: "SaturateToUint16ConcatGroupedInt32x8",
argLen: 2,
generic: true,
},
{
- name: "SaturateToUint16ConcatUint32x8",
+ name: "SaturateToUint16ConcatGroupedInt32x16",
argLen: 2,
generic: true,
},
{
- name: "SaturateToUint16ConcatUint32x16",
+ name: "SaturateToUint16ConcatInt32x4",
argLen: 2,
generic: true,
},
@@ -94430,67 +94472,67 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "SubPairsFloat32x8",
+ name: "SubPairsFloat64x2",
argLen: 2,
generic: true,
},
{
- name: "SubPairsFloat64x2",
+ name: "SubPairsGroupedFloat32x8",
argLen: 2,
generic: true,
},
{
- name: "SubPairsFloat64x4",
+ name: "SubPairsGroupedFloat64x4",
argLen: 2,
generic: true,
},
{
- name: "SubPairsInt16x8",
+ name: "SubPairsGroupedInt16x16",
argLen: 2,
generic: true,
},
{
- name: "SubPairsInt16x16",
+ name: "SubPairsGroupedInt32x8",
argLen: 2,
generic: true,
},
{
- name: "SubPairsInt32x4",
+ name: "SubPairsGroupedUint16x16",
argLen: 2,
generic: true,
},
{
- name: "SubPairsInt32x8",
+ name: "SubPairsGroupedUint32x8",
argLen: 2,
generic: true,
},
{
- name: "SubPairsSaturatedInt16x8",
+ name: "SubPairsInt16x8",
argLen: 2,
generic: true,
},
{
- name: "SubPairsSaturatedInt16x16",
+ name: "SubPairsInt32x4",
argLen: 2,
generic: true,
},
{
- name: "SubPairsUint16x8",
+ name: "SubPairsSaturatedGroupedInt16x16",
argLen: 2,
generic: true,
},
{
- name: "SubPairsUint16x16",
+ name: "SubPairsSaturatedInt16x8",
argLen: 2,
generic: true,
},
{
- name: "SubPairsUint32x4",
+ name: "SubPairsUint16x8",
argLen: 2,
generic: true,
},
{
- name: "SubPairsUint32x8",
+ name: "SubPairsUint32x4",
argLen: 2,
generic: true,
},
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 19f16e1cbb..e84bf19c83 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -1006,10 +1006,6 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64VPACKUSDWMasked256(v)
case OpAMD64VPACKUSDWMasked512:
return rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v)
- case OpAMD64VPADDD128:
- return rewriteValueAMD64_OpAMD64VPADDD128(v)
- case OpAMD64VPADDD256:
- return rewriteValueAMD64_OpAMD64VPADDD256(v)
case OpAMD64VPADDD512:
return rewriteValueAMD64_OpAMD64VPADDD512(v)
case OpAMD64VPADDDMasked128:
@@ -1126,22 +1122,6 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v)
case OpAMD64VPCMPUQMasked512:
return rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v)
- case OpAMD64VPDPBUSD512:
- return rewriteValueAMD64_OpAMD64VPDPBUSD512(v)
- case OpAMD64VPDPBUSDMasked128:
- return rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v)
- case OpAMD64VPDPBUSDMasked256:
- return rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v)
- case OpAMD64VPDPBUSDMasked512:
- return rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v)
- case OpAMD64VPDPBUSDS512:
- return rewriteValueAMD64_OpAMD64VPDPBUSDS512(v)
- case OpAMD64VPDPBUSDSMasked128:
- return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v)
- case OpAMD64VPDPBUSDSMasked256:
- return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v)
- case OpAMD64VPDPBUSDSMasked512:
- return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v)
case OpAMD64VPDPWSSD512:
return rewriteValueAMD64_OpAMD64VPDPWSSD512(v)
case OpAMD64VPDPWSSDMasked128:
@@ -1402,6 +1382,10 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64VPOPCNTQMasked256(v)
case OpAMD64VPOPCNTQMasked512:
return rewriteValueAMD64_OpAMD64VPOPCNTQMasked512(v)
+ case OpAMD64VPOR128:
+ return rewriteValueAMD64_OpAMD64VPOR128(v)
+ case OpAMD64VPOR256:
+ return rewriteValueAMD64_OpAMD64VPOR256(v)
case OpAMD64VPORD512:
return rewriteValueAMD64_OpAMD64VPORD512(v)
case OpAMD64VPORDMasked128:
@@ -2133,45 +2117,45 @@ func rewriteValueAMD64(v *Value) bool {
case OpAddPairsFloat32x4:
v.Op = OpAMD64VHADDPS128
return true
- case OpAddPairsFloat32x8:
- v.Op = OpAMD64VHADDPS256
- return true
case OpAddPairsFloat64x2:
v.Op = OpAMD64VHADDPD128
return true
- case OpAddPairsFloat64x4:
+ case OpAddPairsGroupedFloat32x8:
+ v.Op = OpAMD64VHADDPS256
+ return true
+ case OpAddPairsGroupedFloat64x4:
v.Op = OpAMD64VHADDPD256
return true
- case OpAddPairsInt16x16:
+ case OpAddPairsGroupedInt16x16:
+ v.Op = OpAMD64VPHADDW256
+ return true
+ case OpAddPairsGroupedInt32x8:
+ v.Op = OpAMD64VPHADDD256
+ return true
+ case OpAddPairsGroupedUint16x16:
v.Op = OpAMD64VPHADDW256
return true
+ case OpAddPairsGroupedUint32x8:
+ v.Op = OpAMD64VPHADDD256
+ return true
case OpAddPairsInt16x8:
v.Op = OpAMD64VPHADDW128
return true
case OpAddPairsInt32x4:
v.Op = OpAMD64VPHADDD128
return true
- case OpAddPairsInt32x8:
- v.Op = OpAMD64VPHADDD256
- return true
- case OpAddPairsSaturatedInt16x16:
+ case OpAddPairsSaturatedGroupedInt16x16:
v.Op = OpAMD64VPHADDSW256
return true
case OpAddPairsSaturatedInt16x8:
v.Op = OpAMD64VPHADDSW128
return true
- case OpAddPairsUint16x16:
- v.Op = OpAMD64VPHADDW256
- return true
case OpAddPairsUint16x8:
v.Op = OpAMD64VPHADDW128
return true
case OpAddPairsUint32x4:
v.Op = OpAMD64VPHADDD128
return true
- case OpAddPairsUint32x8:
- v.Op = OpAMD64VPHADDD256
- return true
case OpAddPtr:
v.Op = OpAMD64ADDQ
return true
@@ -3066,19 +3050,25 @@ func rewriteValueAMD64(v *Value) bool {
case OpCvtMask32x16to16:
return rewriteValueAMD64_OpCvtMask32x16to16(v)
case OpCvtMask32x4to8:
- return rewriteValueAMD64_OpCvtMask32x4to8(v)
+ v.Op = OpAMD64VMOVMSKPS128
+ return true
case OpCvtMask32x8to8:
- return rewriteValueAMD64_OpCvtMask32x8to8(v)
+ v.Op = OpAMD64VMOVMSKPS256
+ return true
case OpCvtMask64x2to8:
- return rewriteValueAMD64_OpCvtMask64x2to8(v)
+ v.Op = OpAMD64VMOVMSKPD128
+ return true
case OpCvtMask64x4to8:
- return rewriteValueAMD64_OpCvtMask64x4to8(v)
+ v.Op = OpAMD64VMOVMSKPD256
+ return true
case OpCvtMask64x8to8:
return rewriteValueAMD64_OpCvtMask64x8to8(v)
case OpCvtMask8x16to16:
- return rewriteValueAMD64_OpCvtMask8x16to16(v)
+ v.Op = OpAMD64VPMOVMSKB128
+ return true
case OpCvtMask8x32to32:
- return rewriteValueAMD64_OpCvtMask8x32to32(v)
+ v.Op = OpAMD64VPMOVMSKB256
+ return true
case OpCvtMask8x64to64:
return rewriteValueAMD64_OpCvtMask8x64to64(v)
case OpDiv128u:
@@ -3142,24 +3132,6 @@ func rewriteValueAMD64(v *Value) bool {
case OpDotProductPairsSaturatedUint8x64:
v.Op = OpAMD64VPMADDUBSW512
return true
- case OpDotProductQuadrupleInt32x16:
- v.Op = OpAMD64VPDPBUSD512
- return true
- case OpDotProductQuadrupleInt32x4:
- v.Op = OpAMD64VPDPBUSD128
- return true
- case OpDotProductQuadrupleInt32x8:
- v.Op = OpAMD64VPDPBUSD256
- return true
- case OpDotProductQuadrupleSaturatedInt32x16:
- v.Op = OpAMD64VPDPBUSDS512
- return true
- case OpDotProductQuadrupleSaturatedInt32x4:
- v.Op = OpAMD64VPDPBUSDS128
- return true
- case OpDotProductQuadrupleSaturatedInt32x8:
- v.Op = OpAMD64VPDPBUSDS256
- return true
case OpEq16:
return rewriteValueAMD64_OpEq16(v)
case OpEq32:
@@ -3312,64 +3284,64 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpExpandUint8x32(v)
case OpExpandUint8x64:
return rewriteValueAMD64_OpExpandUint8x64(v)
- case OpExtendLo2ToInt64x2Int16x8:
+ case OpExtendLo2ToInt64Int16x8:
v.Op = OpAMD64VPMOVSXWQ128
return true
- case OpExtendLo2ToInt64x2Int32x4:
+ case OpExtendLo2ToInt64Int32x4:
v.Op = OpAMD64VPMOVSXDQ128
return true
- case OpExtendLo2ToInt64x2Int8x16:
+ case OpExtendLo2ToInt64Int8x16:
v.Op = OpAMD64VPMOVSXBQ128
return true
- case OpExtendLo2ToUint64x2Uint16x8:
+ case OpExtendLo2ToUint64Uint16x8:
v.Op = OpAMD64VPMOVZXWQ128
return true
- case OpExtendLo2ToUint64x2Uint32x4:
+ case OpExtendLo2ToUint64Uint32x4:
v.Op = OpAMD64VPMOVZXDQ128
return true
- case OpExtendLo2ToUint64x2Uint8x16:
+ case OpExtendLo2ToUint64Uint8x16:
v.Op = OpAMD64VPMOVZXBQ128
return true
- case OpExtendLo4ToInt32x4Int16x8:
+ case OpExtendLo4ToInt32Int16x8:
v.Op = OpAMD64VPMOVSXWD128
return true
- case OpExtendLo4ToInt32x4Int8x16:
+ case OpExtendLo4ToInt32Int8x16:
v.Op = OpAMD64VPMOVSXBD128
return true
- case OpExtendLo4ToInt64x4Int16x8:
+ case OpExtendLo4ToInt64Int16x8:
v.Op = OpAMD64VPMOVSXWQ256
return true
- case OpExtendLo4ToInt64x4Int8x16:
+ case OpExtendLo4ToInt64Int8x16:
v.Op = OpAMD64VPMOVSXBQ256
return true
- case OpExtendLo4ToUint32x4Uint16x8:
+ case OpExtendLo4ToUint32Uint16x8:
v.Op = OpAMD64VPMOVZXWD128
return true
- case OpExtendLo4ToUint32x4Uint8x16:
+ case OpExtendLo4ToUint32Uint8x16:
v.Op = OpAMD64VPMOVZXBD128
return true
- case OpExtendLo4ToUint64x4Uint16x8:
+ case OpExtendLo4ToUint64Uint16x8:
v.Op = OpAMD64VPMOVZXWQ256
return true
- case OpExtendLo4ToUint64x4Uint8x16:
+ case OpExtendLo4ToUint64Uint8x16:
v.Op = OpAMD64VPMOVZXBQ256
return true
- case OpExtendLo8ToInt16x8Int8x16:
+ case OpExtendLo8ToInt16Int8x16:
v.Op = OpAMD64VPMOVSXBW128
return true
- case OpExtendLo8ToInt32x8Int8x16:
+ case OpExtendLo8ToInt32Int8x16:
v.Op = OpAMD64VPMOVSXBD256
return true
- case OpExtendLo8ToInt64x8Int8x16:
+ case OpExtendLo8ToInt64Int8x16:
v.Op = OpAMD64VPMOVSXBQ512
return true
- case OpExtendLo8ToUint16x8Uint8x16:
+ case OpExtendLo8ToUint16Uint8x16:
v.Op = OpAMD64VPMOVZXBW128
return true
- case OpExtendLo8ToUint32x8Uint8x16:
+ case OpExtendLo8ToUint32Uint8x16:
v.Op = OpAMD64VPMOVZXBD256
return true
- case OpExtendLo8ToUint64x8Uint8x16:
+ case OpExtendLo8ToUint64Uint8x16:
v.Op = OpAMD64VPMOVZXBQ512
return true
case OpExtendToInt16Int8x16:
@@ -3811,18 +3783,18 @@ func rewriteValueAMD64(v *Value) bool {
return true
case OpIsInBounds:
return rewriteValueAMD64_OpIsInBounds(v)
- case OpIsNanFloat32x16:
- return rewriteValueAMD64_OpIsNanFloat32x16(v)
- case OpIsNanFloat32x4:
- return rewriteValueAMD64_OpIsNanFloat32x4(v)
- case OpIsNanFloat32x8:
- return rewriteValueAMD64_OpIsNanFloat32x8(v)
- case OpIsNanFloat64x2:
- return rewriteValueAMD64_OpIsNanFloat64x2(v)
- case OpIsNanFloat64x4:
- return rewriteValueAMD64_OpIsNanFloat64x4(v)
- case OpIsNanFloat64x8:
- return rewriteValueAMD64_OpIsNanFloat64x8(v)
+ case OpIsNaNFloat32x16:
+ return rewriteValueAMD64_OpIsNaNFloat32x16(v)
+ case OpIsNaNFloat32x4:
+ return rewriteValueAMD64_OpIsNaNFloat32x4(v)
+ case OpIsNaNFloat32x8:
+ return rewriteValueAMD64_OpIsNaNFloat32x8(v)
+ case OpIsNaNFloat64x2:
+ return rewriteValueAMD64_OpIsNaNFloat64x2(v)
+ case OpIsNaNFloat64x4:
+ return rewriteValueAMD64_OpIsNaNFloat64x4(v)
+ case OpIsNaNFloat64x8:
+ return rewriteValueAMD64_OpIsNaNFloat64x8(v)
case OpIsNonNil:
return rewriteValueAMD64_OpIsNonNil(v)
case OpIsSliceInBounds:
@@ -5040,15 +5012,15 @@ func rewriteValueAMD64(v *Value) bool {
case OpSHA256TwoRoundsUint32x4:
v.Op = OpAMD64SHA256RNDS2128
return true
- case OpSaturateToInt16ConcatInt32x16:
+ case OpSaturateToInt16ConcatGroupedInt32x16:
v.Op = OpAMD64VPACKSSDW512
return true
+ case OpSaturateToInt16ConcatGroupedInt32x8:
+ v.Op = OpAMD64VPACKSSDW256
+ return true
case OpSaturateToInt16ConcatInt32x4:
v.Op = OpAMD64VPACKSSDW128
return true
- case OpSaturateToInt16ConcatInt32x8:
- v.Op = OpAMD64VPACKSSDW256
- return true
case OpSaturateToInt16Int32x16:
v.Op = OpAMD64VPMOVSDW256
return true
@@ -5103,15 +5075,15 @@ func rewriteValueAMD64(v *Value) bool {
case OpSaturateToInt8Int64x8:
v.Op = OpAMD64VPMOVSQB128_512
return true
- case OpSaturateToUint16ConcatUint32x16:
+ case OpSaturateToUint16ConcatGroupedInt32x16:
v.Op = OpAMD64VPACKUSDW512
return true
- case OpSaturateToUint16ConcatUint32x4:
- v.Op = OpAMD64VPACKUSDW128
- return true
- case OpSaturateToUint16ConcatUint32x8:
+ case OpSaturateToUint16ConcatGroupedInt32x8:
v.Op = OpAMD64VPACKUSDW256
return true
+ case OpSaturateToUint16ConcatInt32x4:
+ v.Op = OpAMD64VPACKUSDW128
+ return true
case OpSaturateToUint16Uint32x16:
v.Op = OpAMD64VPMOVUSDW256
return true
@@ -5139,32 +5111,32 @@ func rewriteValueAMD64(v *Value) bool {
case OpSaturateToUint32Uint64x8:
v.Op = OpAMD64VPMOVUSQD256
return true
- case OpSaturateToUint8Int16x16:
- v.Op = OpAMD64VPMOVSWB128_256
+ case OpSaturateToUint8Uint16x16:
+ v.Op = OpAMD64VPMOVUSWB128_256
return true
- case OpSaturateToUint8Int16x8:
- v.Op = OpAMD64VPMOVSWB128_128
+ case OpSaturateToUint8Uint16x32:
+ v.Op = OpAMD64VPMOVUSWB256
return true
- case OpSaturateToUint8Int32x16:
- v.Op = OpAMD64VPMOVSDB128_512
+ case OpSaturateToUint8Uint16x8:
+ v.Op = OpAMD64VPMOVUSWB128_128
return true
- case OpSaturateToUint8Int32x4:
- v.Op = OpAMD64VPMOVSDB128_128
+ case OpSaturateToUint8Uint32x16:
+ v.Op = OpAMD64VPMOVUSDB128_512
return true
- case OpSaturateToUint8Int32x8:
- v.Op = OpAMD64VPMOVSDB128_256
+ case OpSaturateToUint8Uint32x4:
+ v.Op = OpAMD64VPMOVUSDB128_128
return true
- case OpSaturateToUint8Int64x2:
- v.Op = OpAMD64VPMOVSQB128_128
+ case OpSaturateToUint8Uint32x8:
+ v.Op = OpAMD64VPMOVUSDB128_256
return true
- case OpSaturateToUint8Int64x4:
- v.Op = OpAMD64VPMOVSQB128_256
+ case OpSaturateToUint8Uint64x2:
+ v.Op = OpAMD64VPMOVUSQB128_128
return true
- case OpSaturateToUint8Int64x8:
- v.Op = OpAMD64VPMOVSQB128_512
+ case OpSaturateToUint8Uint64x4:
+ v.Op = OpAMD64VPMOVUSQB128_256
return true
- case OpSaturateToUint8Uint16x32:
- v.Op = OpAMD64VPMOVUSWB256
+ case OpSaturateToUint8Uint64x8:
+ v.Op = OpAMD64VPMOVUSQB128_512
return true
case OpScaleFloat32x16:
v.Op = OpAMD64VSCALEFPS512
@@ -5898,45 +5870,45 @@ func rewriteValueAMD64(v *Value) bool {
case OpSubPairsFloat32x4:
v.Op = OpAMD64VHSUBPS128
return true
- case OpSubPairsFloat32x8:
- v.Op = OpAMD64VHSUBPS256
- return true
case OpSubPairsFloat64x2:
v.Op = OpAMD64VHSUBPD128
return true
- case OpSubPairsFloat64x4:
+ case OpSubPairsGroupedFloat32x8:
+ v.Op = OpAMD64VHSUBPS256
+ return true
+ case OpSubPairsGroupedFloat64x4:
v.Op = OpAMD64VHSUBPD256
return true
- case OpSubPairsInt16x16:
+ case OpSubPairsGroupedInt16x16:
+ v.Op = OpAMD64VPHSUBW256
+ return true
+ case OpSubPairsGroupedInt32x8:
+ v.Op = OpAMD64VPHSUBD256
+ return true
+ case OpSubPairsGroupedUint16x16:
v.Op = OpAMD64VPHSUBW256
return true
+ case OpSubPairsGroupedUint32x8:
+ v.Op = OpAMD64VPHSUBD256
+ return true
case OpSubPairsInt16x8:
v.Op = OpAMD64VPHSUBW128
return true
case OpSubPairsInt32x4:
v.Op = OpAMD64VPHSUBD128
return true
- case OpSubPairsInt32x8:
- v.Op = OpAMD64VPHSUBD256
- return true
- case OpSubPairsSaturatedInt16x16:
+ case OpSubPairsSaturatedGroupedInt16x16:
v.Op = OpAMD64VPHSUBSW256
return true
case OpSubPairsSaturatedInt16x8:
v.Op = OpAMD64VPHSUBSW128
return true
- case OpSubPairsUint16x16:
- v.Op = OpAMD64VPHSUBW256
- return true
case OpSubPairsUint16x8:
v.Op = OpAMD64VPHSUBW128
return true
case OpSubPairsUint32x4:
v.Op = OpAMD64VPHSUBD128
return true
- case OpSubPairsUint32x8:
- v.Op = OpAMD64VPHSUBD256
- return true
case OpSubPtr:
v.Op = OpAMD64SUBQ
return true
@@ -28763,7 +28735,7 @@ func rewriteValueAMD64_OpAMD64VCMPPD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VCMPPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VCMPPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -28779,7 +28751,7 @@ func rewriteValueAMD64_OpAMD64VCMPPD512(v *Value) bool {
break
}
v.reset(OpAMD64VCMPPD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -28792,7 +28764,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VCMPPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VCMPPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -28809,7 +28781,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VCMPPDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -28822,7 +28794,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VCMPPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VCMPPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -28839,7 +28811,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VCMPPDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -28852,7 +28824,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VCMPPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VCMPPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -28869,7 +28841,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VCMPPDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -28881,7 +28853,7 @@ func rewriteValueAMD64_OpAMD64VCMPPS512(v *Value) bool {
v_0 := v.Args[0]
// match: (VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VCMPPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -28897,7 +28869,7 @@ func rewriteValueAMD64_OpAMD64VCMPPS512(v *Value) bool {
break
}
v.reset(OpAMD64VCMPPS512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -28910,7 +28882,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VCMPPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VCMPPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -28927,7 +28899,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VCMPPSMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -28940,7 +28912,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VCMPPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VCMPPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -28957,7 +28929,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VCMPPSMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -28970,7 +28942,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VCMPPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VCMPPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -28987,7 +28959,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VCMPPSMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -32605,7 +32577,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB128(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32621,7 +32593,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB128(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEINVQB128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -32633,7 +32605,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB256(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32649,7 +32621,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB256(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEINVQB256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -32661,7 +32633,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB512(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32677,7 +32649,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB512(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEINVQB512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -32690,7 +32662,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32707,7 +32679,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -32720,7 +32692,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32737,7 +32709,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -32750,7 +32722,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32767,7 +32739,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -32779,7 +32751,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB128(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32795,7 +32767,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB128(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEQB128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -32807,7 +32779,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB256(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32823,7 +32795,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB256(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEQB256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -32835,7 +32807,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB512(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32851,7 +32823,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB512(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEQB512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -32864,7 +32836,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32881,7 +32853,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEQBMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -32894,7 +32866,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32911,7 +32883,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEQBMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -32924,7 +32896,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -32941,7 +32913,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VGF2P8AFFINEQBMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -33775,6 +33747,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool {
v.AddArg2(x, mask)
return true
}
+ // match: (VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask)
+ // result: (VPMOVUSWBMasked128_128 x mask)
+ for {
+ if v_0.Op != OpAMD64VPMOVUSWB128_128 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPMOVUSWBMasked128_128)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU16Masked128 (VPSHLDW128 [a] x y) mask)
// result: (VPSHLDWMasked128 [a] x y mask)
for {
@@ -34327,6 +34311,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool {
v.AddArg2(x, mask)
return true
}
+ // match: (VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask)
+ // result: (VPMOVUSWBMasked128_256 x mask)
+ for {
+ if v_0.Op != OpAMD64VPMOVUSWB128_256 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPMOVUSWBMasked128_256)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask)
// result: (VPMOVUSWBMasked256 x mask)
for {
@@ -35294,34 +35290,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool {
v.AddArg3(x, y, mask)
return true
}
- // match: (VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask)
- // result: (VPDPBUSDMasked128 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSD128 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDMasked128)
- v.AddArg4(x, y, z, mask)
- return true
- }
- // match: (VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask)
- // result: (VPDPBUSDSMasked128 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSDS128 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDSMasked128)
- v.AddArg4(x, y, z, mask)
- return true
- }
// match: (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask)
// result: (VPMOVSXDQMasked128 x mask)
for {
@@ -35607,6 +35575,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool {
v.AddArg2(x, mask)
return true
}
+ // match: (VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask)
+ // result: (VPMOVUSDBMasked128_128 x mask)
+ for {
+ if v_0.Op != OpAMD64VPMOVUSDB128_128 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPMOVUSDBMasked128_128)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask)
// result: (VPACKUSDWMasked128 x y mask)
for {
@@ -36129,34 +36109,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool {
v.AddArg3(x, y, mask)
return true
}
- // match: (VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask)
- // result: (VPDPBUSDMasked256 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSD256 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDMasked256)
- v.AddArg4(x, y, z, mask)
- return true
- }
- // match: (VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask)
- // result: (VPDPBUSDSMasked256 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSDS256 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDSMasked256)
- v.AddArg4(x, y, z, mask)
- return true
- }
// match: (VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask)
// result: (VPMOVSXDQMasked256 x mask)
for {
@@ -36480,6 +36432,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool {
v.AddArg2(x, mask)
return true
}
+ // match: (VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask)
+ // result: (VPMOVUSDBMasked128_256 x mask)
+ for {
+ if v_0.Op != OpAMD64VPMOVUSDB128_256 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPMOVUSDBMasked128_256)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask)
// result: (VPACKUSDWMasked256 x y mask)
for {
@@ -37052,34 +37016,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
v.AddArg3(x, y, mask)
return true
}
- // match: (VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask)
- // result: (VPDPBUSDMasked512 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSD512 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDMasked512)
- v.AddArg4(x, y, z, mask)
- return true
- }
- // match: (VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask)
- // result: (VPDPBUSDSMasked512 x y z mask)
- for {
- if v_0.Op != OpAMD64VPDPBUSDS512 {
- break
- }
- z := v_0.Args[2]
- x := v_0.Args[0]
- y := v_0.Args[1]
- mask := v_1
- v.reset(OpAMD64VPDPBUSDSMasked512)
- v.AddArg4(x, y, z, mask)
- return true
- }
// match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask)
// result: (VPMOVSXDQMasked512 x mask)
for {
@@ -37416,6 +37352,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
v.AddArg3(x, y, mask)
return true
}
+ // match: (VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask)
+ // result: (VPMOVUSDBMasked128_512 x mask)
+ for {
+ if v_0.Op != OpAMD64VPMOVUSDB128_512 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPMOVUSDBMasked128_512)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask)
// result: (VPACKUSDWMasked512 x y mask)
for {
@@ -38259,6 +38207,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool {
v.AddArg2(x, mask)
return true
}
+ // match: (VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask)
+ // result: (VPMOVUSQBMasked128_128 x mask)
+ for {
+ if v_0.Op != OpAMD64VPMOVUSQB128_128 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPMOVUSQBMasked128_128)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask)
// result: (VPMOVUSQWMasked128_128 x mask)
for {
@@ -39100,6 +39060,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool {
v.AddArg2(x, mask)
return true
}
+ // match: (VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask)
+ // result: (VPMOVUSQBMasked128_256 x mask)
+ for {
+ if v_0.Op != OpAMD64VPMOVUSQB128_256 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPMOVUSQBMasked128_256)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask)
// result: (VPMOVUSQWMasked128_256 x mask)
for {
@@ -39920,6 +39892,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
+ // match: (VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask)
+ // result: (VPMOVUSQBMasked128_512 x mask)
+ for {
+ if v_0.Op != OpAMD64VPMOVUSQB128_512 {
+ break
+ }
+ x := v_0.Args[0]
+ mask := v_1
+ v.reset(OpAMD64VPMOVUSQBMasked128_512)
+ v.AddArg2(x, mask)
+ return true
+ }
// match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask)
// result: (VPMOVUSQWMasked128_512 x mask)
for {
@@ -42407,151 +42391,9 @@ func rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v *Value) bool {
}
return false
}
-func rewriteValueAMD64_OpAMD64VPADDD128(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPADDD128 (VPDPBUSD128 (Zero128 <t>) x y) z)
- // result: (VPDPBUSD128 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSD128 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero128 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSD128)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- // match: (VPADDD128 (VPDPBUSDS128 (Zero128 <t>) x y) z)
- // result: (VPDPBUSDS128 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSDS128 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero128 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSDS128)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPADDD256(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPADDD256 (VPDPBUSD256 (Zero256 <t>) x y) z)
- // result: (VPDPBUSD256 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSD256 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero256 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSD256)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- // match: (VPADDD256 (VPDPBUSDS256 (Zero256 <t>) x y) z)
- // result: (VPDPBUSDS256 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSDS256 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero256 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSDS256)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPADDD512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
- // match: (VPADDD512 (VPDPBUSD512 (Zero512 <t>) x y) z)
- // result: (VPDPBUSD512 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSD512 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero512 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSD512)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
- // match: (VPADDD512 (VPDPBUSDS512 (Zero512 <t>) x y) z)
- // result: (VPDPBUSDS512 <t> z x y)
- for {
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- if v_0.Op != OpAMD64VPDPBUSDS512 {
- continue
- }
- y := v_0.Args[2]
- v_0_0 := v_0.Args[0]
- if v_0_0.Op != OpAMD64Zero512 {
- continue
- }
- t := v_0_0.Type
- x := v_0.Args[1]
- z := v_1
- v.reset(OpAMD64VPDPBUSDS512)
- v.Type = t
- v.AddArg3(z, x, y)
- return true
- }
- break
- }
// match: (VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPADDD512load {sym} [off] x ptr mem)
@@ -44109,6 +43951,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool {
v.AddArg3(dst, x, mask)
return true
}
+ // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask)
+ // result: (VPMOVUSDBMasked128_512Merging dst x mask)
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPMOVUSDB128_512 {
+ break
+ }
+ x := v_1.Args[0]
+ mask := v_2
+ v.reset(OpAMD64VPMOVUSDBMasked128_512Merging)
+ v.AddArg3(dst, x, mask)
+ return true
+ }
// match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask)
// result: (VPMOVUSDWMasked256Merging dst x mask)
for {
@@ -44869,6 +44724,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool {
v.AddArg3(dst, x, mask)
return true
}
+ // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask)
+ // result: (VPMOVUSQBMasked128_512Merging dst x mask)
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPMOVUSQB128_512 {
+ break
+ }
+ x := v_1.Args[0]
+ mask := v_2
+ v.reset(OpAMD64VPMOVUSQBMasked128_512Merging)
+ v.AddArg3(dst, x, mask)
+ return true
+ }
// match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask)
// result: (VPMOVUSQDMasked256Merging dst x mask)
for {
@@ -47797,6 +47665,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
v.AddArg3(dst, x, v0)
return true
}
+ // match: (VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPMOVUSDB128_128 {
+ break
+ }
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPMOVUSDBMasked128_128Merging)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
// match: (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
@@ -47816,6 +47703,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
v.AddArg3(dst, x, v0)
return true
}
+ // match: (VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPMOVUSQB128_128 {
+ break
+ }
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPMOVUSQBMasked128_128Merging)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
// match: (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
@@ -47854,6 +47760,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
v.AddArg3(dst, x, v0)
return true
}
+ // match: (VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPMOVUSWB128_128 {
+ break
+ }
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPMOVUSWBMasked128_128Merging)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
// match: (VPBLENDVB128 dst (VPMOVWB128_128 x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
@@ -50990,6 +50915,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
v.AddArg3(dst, x, v0)
return true
}
+ // match: (VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPMOVUSDB128_256 {
+ break
+ }
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPMOVUSDBMasked128_256Merging)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
// match: (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
@@ -51009,6 +50953,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
v.AddArg3(dst, x, v0)
return true
}
+ // match: (VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPMOVUSQB128_256 {
+ break
+ }
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPMOVUSQBMasked128_256Merging)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
// match: (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
@@ -51047,6 +51010,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
v.AddArg3(dst, x, v0)
return true
}
+ // match: (VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask)
+ // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+ // result: (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+ for {
+ dst := v_0
+ if v_1.Op != OpAMD64VPMOVUSWB128_256 {
+ break
+ }
+ x := v_1.Args[0]
+ mask := v_2
+ if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+ break
+ }
+ v.reset(OpAMD64VPMOVUSWBMasked128_256Merging)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+ v0.AddArg(mask)
+ v.AddArg3(dst, x, v0)
+ return true
+ }
// match: (VPBLENDVB256 dst (VPMOVWB128_256 x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
@@ -52553,7 +52535,7 @@ func rewriteValueAMD64_OpAMD64VPCMPD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPCMPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52569,7 +52551,7 @@ func rewriteValueAMD64_OpAMD64VPCMPD512(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -52582,7 +52564,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52599,7 +52581,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -52612,7 +52594,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52629,7 +52611,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -52642,7 +52624,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52659,7 +52641,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -52785,7 +52767,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQ512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPCMPQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52801,7 +52783,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQ512(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPQ512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -52814,7 +52796,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52831,7 +52813,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPQMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -52844,7 +52826,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52861,7 +52843,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPQMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -52874,7 +52856,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52891,7 +52873,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPQMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -52903,7 +52885,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPUD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPCMPUD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52919,7 +52901,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUD512(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPUD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -52932,7 +52914,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPUDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPUDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52949,7 +52931,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPUDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -52962,7 +52944,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPUDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPUDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -52979,7 +52961,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPUDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -52992,7 +52974,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPUDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPUDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -53009,7 +52991,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPUDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -53021,7 +53003,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQ512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPUQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPCMPUQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -53037,7 +53019,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQ512(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPUQ512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -53050,7 +53032,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPUQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPUQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -53067,7 +53049,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPUQMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -53080,7 +53062,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPUQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPUQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -53097,7 +53079,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPUQMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -53110,7 +53092,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPCMPUQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPCMPUQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -53127,257 +53109,13 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPCMPUQMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
}
return false
}
-func rewriteValueAMD64_OpAMD64VPDPBUSD512(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem))
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSD512load {sym} [off] x y ptr mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload512 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSD512load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg4(x, y, ptr, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload128 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDMasked128load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload256 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDMasked256load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload512 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDMasked512load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDS512(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem))
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDS512load {sym} [off] x y ptr mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload512 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDS512load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg4(x, y, ptr, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload128 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDSMasked128load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload256 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDSMasked256load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
-func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v *Value) bool {
- v_3 := v.Args[3]
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- // match: (VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
- // cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem)
- for {
- x := v_0
- y := v_1
- l := v_2
- if l.Op != OpAMD64VMOVDQUload512 {
- break
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_3
- if !(canMergeLoad(v, l) && clobber(l)) {
- break
- }
- v.reset(OpAMD64VPDPBUSDSMasked512load)
- v.AuxInt = int32ToAuxInt(off)
- v.Aux = symToAux(sym)
- v.AddArg5(x, y, ptr, mask, mem)
- return true
- }
- return false
-}
func rewriteValueAMD64_OpAMD64VPDPWSSD512(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
@@ -57040,9 +56778,173 @@ func rewriteValueAMD64_OpAMD64VPOPCNTQMasked512(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpAMD64VPOR128(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPOR128 (VCMPPS128 [3] x x) (VCMPPS128 [3] y y))
+ // result: (VCMPPS128 [3] x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpAMD64VCMPPS128 || auxIntToUint8(v_0.AuxInt) != 3 {
+ continue
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS128 || auxIntToUint8(v_1.AuxInt) != 3 {
+ continue
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
+ continue
+ }
+ v.reset(OpAMD64VCMPPS128)
+ v.AuxInt = uint8ToAuxInt(3)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ // match: (VPOR128 (VCMPPD128 [3] x x) (VCMPPD128 [3] y y))
+ // result: (VCMPPD128 [3] x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpAMD64VCMPPD128 || auxIntToUint8(v_0.AuxInt) != 3 {
+ continue
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPD128 || auxIntToUint8(v_1.AuxInt) != 3 {
+ continue
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
+ continue
+ }
+ v.reset(OpAMD64VCMPPD128)
+ v.AuxInt = uint8ToAuxInt(3)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64VPOR256(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (VPOR256 (VCMPPS256 [3] x x) (VCMPPS256 [3] y y))
+ // result: (VCMPPS256 [3] x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpAMD64VCMPPS256 || auxIntToUint8(v_0.AuxInt) != 3 {
+ continue
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS256 || auxIntToUint8(v_1.AuxInt) != 3 {
+ continue
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
+ continue
+ }
+ v.reset(OpAMD64VCMPPS256)
+ v.AuxInt = uint8ToAuxInt(3)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ // match: (VPOR256 (VCMPPD256 [3] x x) (VCMPPD256 [3] y y))
+ // result: (VCMPPD256 [3] x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpAMD64VCMPPD256 || auxIntToUint8(v_0.AuxInt) != 3 {
+ continue
+ }
+ x := v_0.Args[1]
+ if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPD256 || auxIntToUint8(v_1.AuxInt) != 3 {
+ continue
+ }
+ y := v_1.Args[1]
+ if y != v_1.Args[0] {
+ continue
+ }
+ v.reset(OpAMD64VCMPPD256)
+ v.AuxInt = uint8ToAuxInt(3)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64VPORD512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
+ b := v.Block
+ typ := &b.Func.Config.Types
+ // match: (VPORD512 (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) (VPMOVMToVec32x16 (VCMPPS512 [3] y y)))
+ // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpAMD64VPMOVMToVec32x16 {
+ continue
+ }
+ v_0_0 := v_0.Args[0]
+ if v_0_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_0_0.AuxInt) != 3 {
+ continue
+ }
+ x := v_0_0.Args[1]
+ if x != v_0_0.Args[0] || v_1.Op != OpAMD64VPMOVMToVec32x16 {
+ continue
+ }
+ v_1_0 := v_1.Args[0]
+ if v_1_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_1_0.AuxInt) != 3 {
+ continue
+ }
+ y := v_1_0.Args[1]
+ if y != v_1_0.Args[0] {
+ continue
+ }
+ v.reset(OpAMD64VPMOVMToVec32x16)
+ v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+ v0.AuxInt = uint8ToAuxInt(3)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (VPORD512 (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) (VPMOVMToVec64x8 (VCMPPD512 [3] y y)))
+ // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpAMD64VPMOVMToVec64x8 {
+ continue
+ }
+ v_0_0 := v_0.Args[0]
+ if v_0_0.Op != OpAMD64VCMPPD512 || auxIntToUint8(v_0_0.AuxInt) != 3 {
+ continue
+ }
+ x := v_0_0.Args[1]
+ if x != v_0_0.Args[0] || v_1.Op != OpAMD64VPMOVMToVec64x8 {
+ continue
+ }
+ v_1_0 := v_1.Args[0]
+ if v_1_0.Op != OpAMD64VCMPPD512 || auxIntToUint8(v_1_0.AuxInt) != 3 {
+ continue
+ }
+ y := v_1_0.Args[1]
+ if y != v_1_0.Args[0] {
+ continue
+ }
+ v.reset(OpAMD64VPMOVMToVec64x8)
+ v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+ v0.AuxInt = uint8ToAuxInt(3)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
// match: (VPORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
// result: (VPORD512load {sym} [off] x ptr mem)
@@ -57296,7 +57198,7 @@ func rewriteValueAMD64_OpAMD64VPROLD128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPROLD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57311,7 +57213,7 @@ func rewriteValueAMD64_OpAMD64VPROLD128(v *Value) bool {
break
}
v.reset(OpAMD64VPROLD128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -57322,7 +57224,7 @@ func rewriteValueAMD64_OpAMD64VPROLD256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPROLD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57337,7 +57239,7 @@ func rewriteValueAMD64_OpAMD64VPROLD256(v *Value) bool {
break
}
v.reset(OpAMD64VPROLD256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -57348,7 +57250,7 @@ func rewriteValueAMD64_OpAMD64VPROLD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPROLD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57363,7 +57265,7 @@ func rewriteValueAMD64_OpAMD64VPROLD512(v *Value) bool {
break
}
v.reset(OpAMD64VPROLD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -57375,7 +57277,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPROLDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57391,7 +57293,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPROLDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -57403,7 +57305,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPROLDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57419,7 +57321,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPROLDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -57431,7 +57333,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPROLDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57447,7 +57349,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPROLDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -57458,7 +57360,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPROLQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57473,7 +57375,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ128(v *Value) bool {
break
}
v.reset(OpAMD64VPROLQ128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -57484,7 +57386,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPROLQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57499,7 +57401,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ256(v *Value) bool {
break
}
v.reset(OpAMD64VPROLQ256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -57510,7 +57412,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPROLQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57525,7 +57427,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ512(v *Value) bool {
break
}
v.reset(OpAMD64VPROLQ512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -57537,7 +57439,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPROLQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57553,7 +57455,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPROLQMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -57565,7 +57467,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPROLQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57581,7 +57483,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPROLQMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -57593,7 +57495,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPROLQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPROLQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57609,7 +57511,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPROLQMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -57956,7 +57858,7 @@ func rewriteValueAMD64_OpAMD64VPRORD128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPRORD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57971,7 +57873,7 @@ func rewriteValueAMD64_OpAMD64VPRORD128(v *Value) bool {
break
}
v.reset(OpAMD64VPRORD128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -57982,7 +57884,7 @@ func rewriteValueAMD64_OpAMD64VPRORD256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPRORD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -57997,7 +57899,7 @@ func rewriteValueAMD64_OpAMD64VPRORD256(v *Value) bool {
break
}
v.reset(OpAMD64VPRORD256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -58008,7 +57910,7 @@ func rewriteValueAMD64_OpAMD64VPRORD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPRORD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58023,7 +57925,7 @@ func rewriteValueAMD64_OpAMD64VPRORD512(v *Value) bool {
break
}
v.reset(OpAMD64VPRORD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -58035,7 +57937,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPRORDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58051,7 +57953,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPRORDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -58063,7 +57965,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPRORDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58079,7 +57981,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPRORDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -58091,7 +57993,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPRORDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58107,7 +58009,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPRORDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -58118,7 +58020,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPRORQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58133,7 +58035,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ128(v *Value) bool {
break
}
v.reset(OpAMD64VPRORQ128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -58144,7 +58046,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPRORQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58159,7 +58061,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ256(v *Value) bool {
break
}
v.reset(OpAMD64VPRORQ256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -58170,7 +58072,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPRORQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58185,7 +58087,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ512(v *Value) bool {
break
}
v.reset(OpAMD64VPRORQ512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -58197,7 +58099,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPRORQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58213,7 +58115,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPRORQMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -58225,7 +58127,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPRORQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58241,7 +58143,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPRORQMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -58253,7 +58155,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPRORQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPRORQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -58269,7 +58171,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPRORQMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -58617,7 +58519,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHLDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58633,7 +58535,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD128(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDD128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -58645,7 +58547,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHLDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58661,7 +58563,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD256(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDD256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -58673,7 +58575,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHLDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58689,7 +58591,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -58702,7 +58604,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHLDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58719,7 +58621,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -58732,7 +58634,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHLDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58749,7 +58651,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -58762,7 +58664,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHLDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58779,7 +58681,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -58791,7 +58693,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHLDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58807,7 +58709,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ128(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDQ128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -58819,7 +58721,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHLDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58835,7 +58737,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ256(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDQ256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -58847,7 +58749,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHLDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58863,7 +58765,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDQ512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -58876,7 +58778,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHLDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58893,7 +58795,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDQMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -58906,7 +58808,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHLDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58923,7 +58825,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDQMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -58936,7 +58838,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHLDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHLDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -58953,7 +58855,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHLDQMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -59325,7 +59227,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHRDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59341,7 +59243,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD128(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDD128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -59353,7 +59255,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHRDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59369,7 +59271,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD256(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDD256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -59381,7 +59283,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHRDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59397,7 +59299,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -59410,7 +59312,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHRDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59427,7 +59329,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -59440,7 +59342,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHRDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59457,7 +59359,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -59470,7 +59372,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHRDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59487,7 +59389,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -59499,7 +59401,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHRDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59515,7 +59417,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ128(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDQ128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -59527,7 +59429,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHRDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59543,7 +59445,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ256(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDQ256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -59555,7 +59457,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VPSHRDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59571,7 +59473,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDQ512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -59584,7 +59486,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHRDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59601,7 +59503,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDQMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -59614,7 +59516,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHRDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59631,7 +59533,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDQMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -59644,7 +59546,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHRDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
+ // result: (VPSHRDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -59661,7 +59563,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHRDQMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, ptr, mask, mem)
return true
@@ -60032,7 +59934,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHUFD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPSHUFD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60047,7 +59949,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFD512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHUFD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -60059,7 +59961,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHUFDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSHUFDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60075,7 +59977,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VPSHUFDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -60087,7 +59989,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHUFDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSHUFDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60103,7 +60005,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VPSHUFDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -60115,7 +60017,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSHUFDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSHUFDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60131,7 +60033,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VPSHUFDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -60196,7 +60098,7 @@ func rewriteValueAMD64_OpAMD64VPSLLD512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSLLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPSLLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60211,7 +60113,7 @@ func rewriteValueAMD64_OpAMD64VPSLLD512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSLLD512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -60243,7 +60145,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked128const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSLLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSLLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60259,7 +60161,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked128const(v *Value) bool {
break
}
v.reset(OpAMD64VPSLLDMasked128constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -60291,7 +60193,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked256const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSLLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSLLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60307,7 +60209,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked256const(v *Value) bool {
break
}
v.reset(OpAMD64VPSLLDMasked256constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -60339,7 +60241,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSLLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSLLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60355,7 +60257,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSLLDMasked512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -60420,7 +60322,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQ512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSLLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPSLLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60435,7 +60337,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQ512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSLLQ512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -60467,7 +60369,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked128const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSLLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSLLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60483,7 +60385,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked128const(v *Value) bool {
break
}
v.reset(OpAMD64VPSLLQMasked128constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -60515,7 +60417,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked256const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSLLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSLLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60531,7 +60433,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked256const(v *Value) bool {
break
}
v.reset(OpAMD64VPSLLQMasked256constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -60563,7 +60465,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSLLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSLLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -60579,7 +60481,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSLLQMasked512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -60986,7 +60888,7 @@ func rewriteValueAMD64_OpAMD64VPSRAD512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRAD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPSRAD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61001,7 +60903,7 @@ func rewriteValueAMD64_OpAMD64VPSRAD512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRAD512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -61033,7 +60935,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked128const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRADMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRADMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61049,7 +60951,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked128const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRADMasked128constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61081,7 +60983,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked256const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRADMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRADMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61097,7 +60999,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked256const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRADMasked256constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61129,7 +61031,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRADMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRADMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61145,7 +61047,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRADMasked512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61174,7 +61076,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ128const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRAQ128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPSRAQ128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61189,7 +61091,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ128const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRAQ128constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -61218,7 +61120,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ256const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRAQ256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPSRAQ256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61233,7 +61135,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ256const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRAQ256constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -61262,7 +61164,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRAQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPSRAQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61277,7 +61179,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRAQ512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -61309,7 +61211,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRAQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRAQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61325,7 +61227,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRAQMasked128constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61357,7 +61259,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRAQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRAQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61373,7 +61275,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRAQMasked256constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61405,7 +61307,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRAQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRAQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61421,7 +61323,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRAQMasked512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61828,7 +61730,7 @@ func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPSRLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61843,7 +61745,7 @@ func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRLD512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -61855,7 +61757,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61871,7 +61773,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRLDMasked128constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61883,7 +61785,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61899,7 +61801,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRLDMasked256constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61911,7 +61813,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61927,7 +61829,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRLDMasked512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61938,7 +61840,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VPSRLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61953,7 +61855,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRLQ512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -61965,7 +61867,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -61981,7 +61883,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRLQMasked128constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -61993,7 +61895,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -62009,7 +61911,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRLQMasked256constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -62021,7 +61923,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPSRLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VPSRLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -62037,7 +61939,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool {
break
}
v.reset(OpAMD64VPSRLQMasked512constload)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -62506,7 +62408,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+ // result: (VPTERNLOGD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -62523,7 +62425,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD128(v *Value) bool {
break
}
v.reset(OpAMD64VPTERNLOGD128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, y, ptr, mem)
return true
@@ -62536,7 +62438,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPTERNLOGD256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+ // result: (VPTERNLOGD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -62553,7 +62455,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD256(v *Value) bool {
break
}
v.reset(OpAMD64VPTERNLOGD256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, y, ptr, mem)
return true
@@ -62566,7 +62468,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPTERNLOGD512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+ // result: (VPTERNLOGD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -62583,7 +62485,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD512(v *Value) bool {
break
}
v.reset(OpAMD64VPTERNLOGD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, y, ptr, mem)
return true
@@ -62596,7 +62498,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v *Value) bool {
v_0 := v.Args[0]
// match: (VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPTERNLOGQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+ // result: (VPTERNLOGQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -62613,7 +62515,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v *Value) bool {
break
}
v.reset(OpAMD64VPTERNLOGQ128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, y, ptr, mem)
return true
@@ -62626,7 +62528,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v *Value) bool {
v_0 := v.Args[0]
// match: (VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPTERNLOGQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+ // result: (VPTERNLOGQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -62643,7 +62545,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v *Value) bool {
break
}
v.reset(OpAMD64VPTERNLOGQ256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, y, ptr, mem)
return true
@@ -62656,7 +62558,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v *Value) bool {
v_0 := v.Args[0]
// match: (VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VPTERNLOGQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+ // result: (VPTERNLOGQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -62673,7 +62575,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v *Value) bool {
break
}
v.reset(OpAMD64VPTERNLOGQ512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg4(x, y, ptr, mem)
return true
@@ -63306,7 +63208,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD128(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VREDUCEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63321,7 +63223,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD128(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPD128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63332,7 +63234,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD256(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VREDUCEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63347,7 +63249,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD256(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPD256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63358,7 +63260,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VREDUCEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63373,7 +63275,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD512(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63385,7 +63287,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63401,7 +63303,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63413,7 +63315,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63429,7 +63331,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63441,7 +63343,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63457,7 +63359,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63468,7 +63370,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS128(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VREDUCEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63483,7 +63385,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS128(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPS128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63494,7 +63396,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS256(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VREDUCEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63509,7 +63411,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS256(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPS256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63520,7 +63422,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS512(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VREDUCEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63535,7 +63437,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS512(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPS512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63547,7 +63449,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63563,7 +63465,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPSMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63575,7 +63477,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63591,7 +63493,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPSMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63603,7 +63505,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63619,7 +63521,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VREDUCEPSMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63630,7 +63532,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD128(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VRNDSCALEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63645,7 +63547,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD128(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPD128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63656,7 +63558,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD256(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VRNDSCALEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63671,7 +63573,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD256(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPD256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63682,7 +63584,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VRNDSCALEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63697,7 +63599,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD512(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63709,7 +63611,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63725,7 +63627,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63737,7 +63639,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63753,7 +63655,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63765,7 +63667,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63781,7 +63683,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63792,7 +63694,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS128(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VRNDSCALEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63807,7 +63709,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS128(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPS128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63818,7 +63720,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS256(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VRNDSCALEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63833,7 +63735,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS256(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPS256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63844,7 +63746,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS512(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem)
+ // result: (VRNDSCALEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63859,7 +63761,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS512(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPS512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
@@ -63871,7 +63773,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked128(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63887,7 +63789,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked128(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPSMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63899,7 +63801,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked256(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63915,7 +63817,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked256(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPSMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -63927,7 +63829,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked512(v *Value) bool {
v_0 := v.Args[0]
// match: (VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem)
+ // result: (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
l := v_0
@@ -63943,7 +63845,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked512(v *Value) bool {
break
}
v.reset(OpAMD64VRNDSCALEPSMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(ptr, mask, mem)
return true
@@ -64553,7 +64455,7 @@ func rewriteValueAMD64_OpAMD64VSHUFPD512(v *Value) bool {
v_0 := v.Args[0]
// match: (VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VSHUFPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VSHUFPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -64569,7 +64471,7 @@ func rewriteValueAMD64_OpAMD64VSHUFPD512(v *Value) bool {
break
}
v.reset(OpAMD64VSHUFPD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -64581,7 +64483,7 @@ func rewriteValueAMD64_OpAMD64VSHUFPS512(v *Value) bool {
v_0 := v.Args[0]
// match: (VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem))
// cond: canMergeLoad(v, l) && clobber(l)
- // result: (VSHUFPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
+ // result: (VSHUFPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
x := v_0
@@ -64597,7 +64499,7 @@ func rewriteValueAMD64_OpAMD64VSHUFPS512(v *Value) bool {
break
}
v.reset(OpAMD64VSHUFPS512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off))
v.Aux = symToAux(sym)
v.AddArg3(x, ptr, mem)
return true
@@ -68826,13 +68728,11 @@ func rewriteValueAMD64_OpCvt8toMask64x8(v *Value) bool {
func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
- // match: (CvtMask16x16to16 <t> x)
- // result: (KMOVWi <t> (VPMOVVec16x16ToM <types.TypeMask> x))
+ // match: (CvtMask16x16to16 x)
+ // result: (KMOVWi (VPMOVVec16x16ToM <types.TypeMask> x))
for {
- t := v.Type
x := v_0
v.reset(OpAMD64KMOVWi)
- v.Type = t
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(x)
v.AddArg(v0)
@@ -68842,13 +68742,11 @@ func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool {
func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
- // match: (CvtMask16x32to32 <t> x)
- // result: (KMOVDi <t> (VPMOVVec16x32ToM <types.TypeMask> x))
+ // match: (CvtMask16x32to32 x)
+ // result: (KMOVDi (VPMOVVec16x32ToM <types.TypeMask> x))
for {
- t := v.Type
x := v_0
v.reset(OpAMD64KMOVDi)
- v.Type = t
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
v0.AddArg(x)
v.AddArg(v0)
@@ -68858,13 +68756,11 @@ func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool {
func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
- // match: (CvtMask16x8to8 <t> x)
- // result: (KMOVBi <t> (VPMOVVec16x8ToM <types.TypeMask> x))
+ // match: (CvtMask16x8to8 x)
+ // result: (KMOVBi (VPMOVVec16x8ToM <types.TypeMask> x))
for {
- t := v.Type
x := v_0
v.reset(OpAMD64KMOVBi)
- v.Type = t
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(x)
v.AddArg(v0)
@@ -68874,141 +68770,39 @@ func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool {
func rewriteValueAMD64_OpCvtMask32x16to16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
- // match: (CvtMask32x16to16 <t> x)
- // result: (KMOVWi <t> (VPMOVVec32x16ToM <types.TypeMask> x))
+ // match: (CvtMask32x16to16 x)
+ // result: (KMOVWi (VPMOVVec32x16ToM <types.TypeMask> x))
for {
- t := v.Type
x := v_0
v.reset(OpAMD64KMOVWi)
- v.Type = t
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
-func rewriteValueAMD64_OpCvtMask32x4to8(v *Value) bool {
- v_0 := v.Args[0]
- b := v.Block
- // match: (CvtMask32x4to8 <t> x)
- // result: (KMOVBi <t> (VPMOVVec32x4ToM <types.TypeMask> x))
- for {
- t := v.Type
- x := v_0
- v.reset(OpAMD64KMOVBi)
- v.Type = t
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
- v0.AddArg(x)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpCvtMask32x8to8(v *Value) bool {
- v_0 := v.Args[0]
- b := v.Block
- // match: (CvtMask32x8to8 <t> x)
- // result: (KMOVBi <t> (VPMOVVec32x8ToM <types.TypeMask> x))
- for {
- t := v.Type
- x := v_0
- v.reset(OpAMD64KMOVBi)
- v.Type = t
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
- v0.AddArg(x)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpCvtMask64x2to8(v *Value) bool {
- v_0 := v.Args[0]
- b := v.Block
- // match: (CvtMask64x2to8 <t> x)
- // result: (KMOVBi <t> (VPMOVVec64x2ToM <types.TypeMask> x))
- for {
- t := v.Type
- x := v_0
- v.reset(OpAMD64KMOVBi)
- v.Type = t
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
- v0.AddArg(x)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpCvtMask64x4to8(v *Value) bool {
- v_0 := v.Args[0]
- b := v.Block
- // match: (CvtMask64x4to8 <t> x)
- // result: (KMOVBi <t> (VPMOVVec64x4ToM <types.TypeMask> x))
- for {
- t := v.Type
- x := v_0
- v.reset(OpAMD64KMOVBi)
- v.Type = t
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
- v0.AddArg(x)
- v.AddArg(v0)
- return true
- }
-}
func rewriteValueAMD64_OpCvtMask64x8to8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
- // match: (CvtMask64x8to8 <t> x)
- // result: (KMOVBi <t> (VPMOVVec64x8ToM <types.TypeMask> x))
+ // match: (CvtMask64x8to8 x)
+ // result: (KMOVBi (VPMOVVec64x8ToM <types.TypeMask> x))
for {
- t := v.Type
x := v_0
v.reset(OpAMD64KMOVBi)
- v.Type = t
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
v0.AddArg(x)
v.AddArg(v0)
return true
}
}
-func rewriteValueAMD64_OpCvtMask8x16to16(v *Value) bool {
- v_0 := v.Args[0]
- b := v.Block
- // match: (CvtMask8x16to16 <t> x)
- // result: (KMOVWi <t> (VPMOVVec8x16ToM <types.TypeMask> x))
- for {
- t := v.Type
- x := v_0
- v.reset(OpAMD64KMOVWi)
- v.Type = t
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
- v0.AddArg(x)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpCvtMask8x32to32(v *Value) bool {
- v_0 := v.Args[0]
- b := v.Block
- // match: (CvtMask8x32to32 <t> x)
- // result: (KMOVDi <t> (VPMOVVec8x32ToM <types.TypeMask> x))
- for {
- t := v.Type
- x := v_0
- v.reset(OpAMD64KMOVDi)
- v.Type = t
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
- v0.AddArg(x)
- v.AddArg(v0)
- return true
- }
-}
func rewriteValueAMD64_OpCvtMask8x64to64(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
- // match: (CvtMask8x64to64 <t> x)
- // result: (KMOVQi <t> (VPMOVVec8x64ToM <types.TypeMask> x))
+ // match: (CvtMask8x64to64 x)
+ // result: (KMOVQi (VPMOVVec8x64ToM <types.TypeMask> x))
for {
- t := v.Type
x := v_0
v.reset(OpAMD64KMOVQi)
- v.Type = t
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
v0.AddArg(x)
v.AddArg(v0)
@@ -71229,94 +71023,82 @@ func rewriteValueAMD64_OpIsInBounds(v *Value) bool {
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
- // match: (IsNanFloat32x16 x y)
- // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
+ // match: (IsNaNFloat32x16 x)
+ // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x x))
for {
x := v_0
- y := v_1
v.reset(OpAMD64VPMOVMToVec32x16)
v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
v0.AuxInt = uint8ToAuxInt(3)
- v0.AddArg2(x, y)
+ v0.AddArg2(x, x)
v.AddArg(v0)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat32x4 x y)
- // result: (VCMPPS128 [3] x y)
+ // match: (IsNaNFloat32x4 x)
+ // result: (VCMPPS128 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPS128)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat32x8 x y)
- // result: (VCMPPS256 [3] x y)
+ // match: (IsNaNFloat32x8 x)
+ // result: (VCMPPS256 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPS256)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat64x2 x y)
- // result: (VCMPPD128 [3] x y)
+ // match: (IsNaNFloat64x2 x)
+ // result: (VCMPPD128 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPD128)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat64x4 x y)
- // result: (VCMPPD256 [3] x y)
+ // match: (IsNaNFloat64x4 x)
+ // result: (VCMPPD256 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPD256)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
- // match: (IsNanFloat64x8 x y)
- // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
+ // match: (IsNaNFloat64x8 x)
+ // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x x))
for {
x := v_0
- y := v_1
v.reset(OpAMD64VPMOVMToVec64x8)
v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
v0.AuxInt = uint8ToAuxInt(3)
- v0.AddArg2(x, y)
+ v0.AddArg2(x, x)
v.AddArg(v0)
return true
}
diff --git a/src/cmd/compile/internal/ssa/sccp.go b/src/cmd/compile/internal/ssa/sccp.go
index 9b958d0454..7ef8d6b7c1 100644
--- a/src/cmd/compile/internal/ssa/sccp.go
+++ b/src/cmd/compile/internal/ssa/sccp.go
@@ -507,6 +507,10 @@ func (t *worklist) propagate(block *Block) {
branchIdx = 1 - condLattice.val.AuxInt
} else {
branchIdx = condLattice.val.AuxInt
+ if branchIdx < 0 || branchIdx >= int64(len(block.Succs)) {
+ // unreachable code, do nothing then
+ break
+ }
}
t.edges = append(t.edges, block.Succs[branchIdx])
} else {
diff --git a/src/cmd/compile/internal/ssa/tern_helpers.go b/src/cmd/compile/internal/ssa/tern_helpers.go
index 3ffc980c33..923a9f505e 100644
--- a/src/cmd/compile/internal/ssa/tern_helpers.go
+++ b/src/cmd/compile/internal/ssa/tern_helpers.go
@@ -1,4 +1,4 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
package ssa
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go
index 4425c5617b..e2eebd783d 100644
--- a/src/cmd/compile/internal/ssagen/intrinsics.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics.go
@@ -1667,6 +1667,12 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Float32x4.IsNaN", opLen1(ssa.OpIsNaNFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.IsNaN", opLen1(ssa.OpIsNaNFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.IsNaN", opLen1(ssa.OpIsNaNFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.IsNaN", opLen1(ssa.OpIsNaNFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.IsNaN", opLen1(ssa.OpIsNaNFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.IsNaN", opLen1(ssa.OpIsNaNFloat64x8, types.TypeVec512), sys.AMD64)
// sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 7eb5456994..4ad0c6032c 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -1,4 +1,4 @@
-// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
package ssagen
@@ -69,19 +69,19 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x4.Add", opLen2(ssa.OpAddUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Add", opLen2(ssa.OpAddUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.AddPairs", opLen2(ssa.OpAddPairsFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.AddPairs", opLen2(ssa.OpAddPairsFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.AddPairs", opLen2(ssa.OpAddPairsFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.AddPairs", opLen2(ssa.OpAddPairsFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.AddPairs", opLen2(ssa.OpAddPairsInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.AddPairs", opLen2(ssa.OpAddPairsInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.AddPairs", opLen2(ssa.OpAddPairsInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.AddPairs", opLen2(ssa.OpAddPairsInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.AddPairs", opLen2(ssa.OpAddPairsUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.AddPairs", opLen2(ssa.OpAddPairsUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x4.AddPairs", opLen2(ssa.OpAddPairsUint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x8.AddPairs", opLen2(ssa.OpAddPairsUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x8.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x4.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x16.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x16.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x8.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.AddPairsSaturated", opLen2(ssa.OpAddPairsSaturatedInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.AddPairsSaturated", opLen2(ssa.OpAddPairsSaturatedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x16.AddPairsSaturatedGrouped", opLen2(ssa.OpAddPairsSaturatedGroupedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x16.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x64, types.TypeVec512), sys.AMD64)
@@ -328,12 +328,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint8x16.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x32.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x64.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
@@ -394,26 +388,26 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.Expand", opLen2(ssa.OpExpandUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Expand", opLen2(ssa.OpExpandUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Expand", opLen2(ssa.OpExpandUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x8.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x4.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x8.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x4.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x8.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x8.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int16x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x8.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x8.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint16x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo8ToInt16x8", opLen1(ssa.OpExtendLo8ToInt16x8Int8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo8ToInt32x8", opLen1(ssa.OpExtendLo8ToInt32x8Int8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo8ToInt64x8", opLen1(ssa.OpExtendLo8ToInt64x8Int8x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo8ToUint16x8", opLen1(ssa.OpExtendLo8ToUint16x8Uint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo8ToUint32x8", opLen1(ssa.OpExtendLo8ToUint32x8Uint8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo8ToUint64x8", opLen1(ssa.OpExtendLo8ToUint64x8Uint8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo4ToInt32", opLen1(ssa.OpExtendLo4ToInt32Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.ExtendLo4ToInt32", opLen1(ssa.OpExtendLo4ToInt32Int16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo4ToInt64", opLen1(ssa.OpExtendLo4ToInt64Int8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.ExtendLo4ToInt64", opLen1(ssa.OpExtendLo4ToInt64Int16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo4ToUint32", opLen1(ssa.OpExtendLo4ToUint32Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.ExtendLo4ToUint32", opLen1(ssa.OpExtendLo4ToUint32Uint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo4ToUint64", opLen1(ssa.OpExtendLo4ToUint64Uint8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x8.ExtendLo4ToUint64", opLen1(ssa.OpExtendLo4ToUint64Uint16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo8ToInt16", opLen1(ssa.OpExtendLo8ToInt16Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo8ToInt32", opLen1(ssa.OpExtendLo8ToInt32Int8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo8ToInt64", opLen1(ssa.OpExtendLo8ToInt64Int8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo8ToUint16", opLen1(ssa.OpExtendLo8ToUint16Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo8ToUint32", opLen1(ssa.OpExtendLo8ToUint32Uint8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo8ToUint64", opLen1(ssa.OpExtendLo8ToUint64Uint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x32.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.ExtendToInt32", opLen1(ssa.OpExtendToInt32Int8x16, types.TypeVec512), sys.AMD64)
@@ -577,12 +571,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint32x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x4.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x16, types.TypeVec512), sys.AMD64)
@@ -926,29 +914,29 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int64x4.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int64x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x8.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int64x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x4.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int32x16.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x8.SaturateToInt16ConcatGrouped", opLen2(ssa.OpSaturateToInt16ConcatGroupedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.SaturateToInt16ConcatGrouped", opLen2(ssa.OpSaturateToInt16ConcatGroupedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x8.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int16x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x2.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int64x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x32.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint32x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint32x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x2.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint64x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint64x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint64x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x4.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x16.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x2.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x8.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x4.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x8.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint32x16.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int32x4.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatInt32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x8.SaturateToUint16ConcatGrouped", opLen2(ssa.OpSaturateToUint16ConcatGroupedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.SaturateToUint16ConcatGrouped", opLen2(ssa.OpSaturateToUint16ConcatGroupedInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x8.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x8, types.TypeVec256), sys.AMD64)
@@ -1199,19 +1187,19 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x4.Sub", opLen2(ssa.OpSubUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Sub", opLen2(ssa.OpSubUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.SubPairs", opLen2(ssa.OpSubPairsFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.SubPairs", opLen2(ssa.OpSubPairsFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x2.SubPairs", opLen2(ssa.OpSubPairsFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.SubPairs", opLen2(ssa.OpSubPairsFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.SubPairs", opLen2(ssa.OpSubPairsInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.SubPairs", opLen2(ssa.OpSubPairsInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x4.SubPairs", opLen2(ssa.OpSubPairsInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x8.SubPairs", opLen2(ssa.OpSubPairsInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint16x8.SubPairs", opLen2(ssa.OpSubPairsUint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x16.SubPairs", opLen2(ssa.OpSubPairsUint16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x4.SubPairs", opLen2(ssa.OpSubPairsUint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x8.SubPairs", opLen2(ssa.OpSubPairsUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x8.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x4.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x16.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x8.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x16.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x8.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int16x8.SubPairsSaturated", opLen2(ssa.OpSubPairsSaturatedInt16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x16.SubPairsSaturated", opLen2(ssa.OpSubPairsSaturatedInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x16.SubPairsSaturatedGrouped", opLen2(ssa.OpSubPairsSaturatedGroupedInt16x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x16.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x64, types.TypeVec512), sys.AMD64)
diff --git a/src/cmd/compile/testdata/script/issue77033.txt b/src/cmd/compile/testdata/script/issue77033.txt
new file mode 100644
index 0000000000..3b977e5440
--- /dev/null
+++ b/src/cmd/compile/testdata/script/issue77033.txt
@@ -0,0 +1,40 @@
+go test -bench=Foo -cpuprofile=default.pgo
+go test -bench=Foo -pgo=default.pgo
+! stdout 'FAIL'
+
+-- main_test.go --
+package main
+
+import (
+ "testing"
+)
+
+var a int
+
+func save(x int) {
+ a = x
+}
+
+func foo() {
+ for i := range yield1 {
+ defer save(i)
+ }
+}
+
+func yield1(yield func(int) bool) {
+ yield(1)
+}
+
+func BenchmarkFoo(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ foo()
+ }
+ if a != 1 {
+ b.Fatalf("a = %d; want 1", a)
+ }
+}
+
+-- go.mod --
+module demo
+
+go 1.24
diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go
index 6d3742525c..48c3aa5efd 100644
--- a/src/cmd/dist/test.go
+++ b/src/cmd/dist/test.go
@@ -748,7 +748,7 @@ func (t *tester) registerTests() {
if !strings.Contains(goexperiment, "jsonv2") {
t.registerTest("GOEXPERIMENT=jsonv2 go test encoding/json/...", &goTest{
variant: "jsonv2",
- env: []string{"GOEXPERIMENT=jsonv2"},
+ env: []string{"GOEXPERIMENT=" + goexperiments("jsonv2")},
pkg: "encoding/json/...",
})
}
@@ -757,7 +757,7 @@ func (t *tester) registerTests() {
if !strings.Contains(goexperiment, "runtimesecret") {
t.registerTest("GOEXPERIMENT=runtimesecret go test runtime/secret/...", &goTest{
variant: "runtimesecret",
- env: []string{"GOEXPERIMENT=runtimesecret"},
+ env: []string{"GOEXPERIMENT=" + goexperiments("runtimesecret")},
pkg: "runtime/secret/...",
})
}
@@ -766,7 +766,7 @@ func (t *tester) registerTests() {
if goarch == "amd64" && !strings.Contains(goexperiment, "simd") {
t.registerTest("GOEXPERIMENT=simd go test simd/archsimd/...", &goTest{
variant: "simd",
- env: []string{"GOEXPERIMENT=simd"},
+ env: []string{"GOEXPERIMENT=" + goexperiments("simd")},
pkg: "simd/archsimd/...",
})
}
@@ -1888,3 +1888,19 @@ func fipsVersions(short bool) []string {
}
return versions
}
+
+// goexperiments returns the GOEXPERIMENT value to use
+// when running a test with the given experiments enabled.
+//
+// It preserves any existing GOEXPERIMENTs.
+func goexperiments(exps ...string) string {
+ if len(exps) == 0 {
+ return goexperiment
+ }
+ existing := goexperiment
+ if existing != "" {
+ existing += ","
+ }
+ return existing + strings.Join(exps, ",")
+
+}
diff --git a/src/cmd/go.mod b/src/cmd/go.mod
index c7d3cc6136..85e8c4cb5f 100644
--- a/src/cmd/go.mod
+++ b/src/cmd/go.mod
@@ -11,7 +11,7 @@ require (
golang.org/x/sys v0.39.0
golang.org/x/telemetry v0.0.0-20251128220624-abf20d0e57ec
golang.org/x/term v0.38.0
- golang.org/x/tools v0.39.1-0.20251205000126-062ef7b6ced2
+ golang.org/x/tools v0.39.1-0.20251230210517-d44be789a05c
)
require (
diff --git a/src/cmd/go.sum b/src/cmd/go.sum
index b02c469a41..61c88e5253 100644
--- a/src/cmd/go.sum
+++ b/src/cmd/go.sum
@@ -22,7 +22,7 @@ golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q=
golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg=
golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
-golang.org/x/tools v0.39.1-0.20251205000126-062ef7b6ced2 h1:2Qqv605Nus9iUp3ErvEU/q92Q3HAzeROztzl9pzAno8=
-golang.org/x/tools v0.39.1-0.20251205000126-062ef7b6ced2/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
+golang.org/x/tools v0.39.1-0.20251230210517-d44be789a05c h1:0pZej6BQOooNbOfjJEu4v5qx9hdwFX8HnvHCcNXcs2w=
+golang.org/x/tools v0.39.1-0.20251230210517-d44be789a05c/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef h1:mqLYrXCXYEZOop9/Dbo6RPX11539nwiCNBb1icVPmw8=
rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef/go.mod h1:8xcPgWmwlZONN1D9bjxtHEjrUtSEa3fakVF8iaewYKQ=
diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go
index fe9b862073..8c346dafdb 100644
--- a/src/cmd/go/alldocs.go
+++ b/src/cmd/go/alldocs.go
@@ -1954,7 +1954,7 @@
//
// -o file
// Save a copy of the test binary to the named file.
-// The test still runs (unless -c or -i is specified).
+// The test still runs (unless -c is specified).
// If file ends in a slash or names an existing directory,
// the test is written to pkg.test in that directory.
//
diff --git a/src/cmd/go/internal/doc/pkgsite.go b/src/cmd/go/internal/doc/pkgsite.go
index c173167b63..dc344cbbca 100644
--- a/src/cmd/go/internal/doc/pkgsite.go
+++ b/src/cmd/go/internal/doc/pkgsite.go
@@ -71,7 +71,7 @@ func doPkgsite(urlPath, fragment string) error {
env = append(env, "GOPROXY="+gomodcache+","+goproxy)
}
- const version = "v0.0.0-20250714212547-01b046e81fe7"
+ const version = "v0.0.0-20251223195805-1a3bd3c788fe"
cmd := exec.Command(goCmd(), "run", "golang.org/x/pkgsite/cmd/internal/doc@"+version,
"-gorepo", buildCtx.GOROOT,
"-http", addr,
diff --git a/src/cmd/go/internal/modindex/scan.go b/src/cmd/go/internal/modindex/scan.go
index af2c0abe04..beded695bf 100644
--- a/src/cmd/go/internal/modindex/scan.go
+++ b/src/cmd/go/internal/modindex/scan.go
@@ -112,10 +112,10 @@ func parseErrorToString(err error) string {
return ""
}
var p parseError
- if e, ok := err.(scanner.ErrorList); ok {
- p.ErrorList = &e
+ if errlist, ok := err.(scanner.ErrorList); ok {
+ p.ErrorList = &errlist
} else {
- p.ErrorString = e.Error()
+ p.ErrorString = err.Error()
}
s, err := json.Marshal(p)
if err != nil {
diff --git a/src/cmd/go/internal/test/test.go b/src/cmd/go/internal/test/test.go
index 916943904d..9309aa65ed 100644
--- a/src/cmd/go/internal/test/test.go
+++ b/src/cmd/go/internal/test/test.go
@@ -163,7 +163,7 @@ In addition to the build flags, the flags handled by 'go test' itself are:
-o file
Save a copy of the test binary to the named file.
- The test still runs (unless -c or -i is specified).
+ The test still runs (unless -c is specified).
If file ends in a slash or names an existing directory,
the test is written to pkg.test in that directory.
diff --git a/src/cmd/go/testdata/script/list_empty_importpath.txt b/src/cmd/go/testdata/script/list_empty_importpath.txt
index fe4210322b..0960a7795d 100644
--- a/src/cmd/go/testdata/script/list_empty_importpath.txt
+++ b/src/cmd/go/testdata/script/list_empty_importpath.txt
@@ -1,15 +1,6 @@
! go list all
! stderr 'panic'
-[!GOOS:windows] [!GOOS:solaris] [!GOOS:freebsd] [!GOOS:openbsd] [!GOOS:netbsd] stderr 'invalid import path'
-# #73976: Allow 'no errors' on Windows, Solaris, and BSD until issue
-# is resolved to prevent flakes. 'no errors' is printed by
-# empty scanner.ErrorList errors so that's probably where the
-# message is coming from, though we don't know how.
-[GOOS:windows] stderr 'invalid import path|no errors'
-[GOOS:solaris] stderr 'invalid import path|no errors'
-[GOOS:freebsd] stderr 'invalid import path|no errors'
-[GOOS:openbsd] stderr 'invalid import path|no errors'
-[GOOS:netbsd] stderr 'invalid import path|no errors'
+stderr 'invalid import path'
# go list produces a package for 'p' but not for ''
go list -e all
diff --git a/src/cmd/go/testdata/vcstest/git/legacytest.txt b/src/cmd/go/testdata/vcstest/git/legacytest.txt
index 5846983cef..6465242d62 100644
--- a/src/cmd/go/testdata/vcstest/git/legacytest.txt
+++ b/src/cmd/go/testdata/vcstest/git/legacytest.txt
@@ -6,7 +6,7 @@ env GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME
env GIT_COMMITTER_EMAIL=$GIT_AUTHOR_EMAIL
git init
-git branch -M master
+git checkout -b master
at 2018-07-17T12:41:39-04:00
cp x_cf92c7b.go x.go
diff --git a/src/cmd/internal/bootstrap_test/overlaydir_test.go b/src/cmd/internal/bootstrap_test/overlaydir_test.go
index 5812c453ac..bee3214b67 100644
--- a/src/cmd/internal/bootstrap_test/overlaydir_test.go
+++ b/src/cmd/internal/bootstrap_test/overlaydir_test.go
@@ -43,6 +43,9 @@ func overlayDir(dstRoot, srcRoot string) error {
dstPath := filepath.Join(dstRoot, suffix)
info, err := entry.Info()
+ if err != nil {
+ return err
+ }
perm := info.Mode() & os.ModePerm
if info.Mode()&os.ModeSymlink != 0 {
info, err = os.Stat(srcPath)
diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go
index bc7504e5b1..036eda13bc 100644
--- a/src/cmd/link/link_test.go
+++ b/src/cmd/link/link_test.go
@@ -869,6 +869,9 @@ func TestFuncAlignOption(t *testing.T) {
"_main.bar": false,
"_main.baz": false}
syms, err := f.Symbols()
+ if err != nil {
+ t.Errorf("failed to get symbols with err %v", err)
+ }
for _, s := range syms {
fn := s.Name
if _, ok := fname[fn]; !ok {
diff --git a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/doc.go b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/doc.go
index 45aed7909c..f1202c7a11 100644
--- a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/doc.go
+++ b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/doc.go
@@ -80,6 +80,8 @@ or b.ResetTimer within the same function will also be removed.
Caveats: The b.Loop() method is designed to prevent the compiler from
optimizing away the benchmark loop, which can occasionally result in
slower execution due to increased allocations in some specific cases.
+Since its fix may change the performance of nanosecond-scale benchmarks,
+bloop is disabled by default in the `go fix` analyzer suite; see golang/go#74967.
# Analyzer any
diff --git a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/maps.go b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/maps.go
index f97541d4b3..795f5b6c6b 100644
--- a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/maps.go
+++ b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/maps.go
@@ -231,9 +231,28 @@ func mapsloop(pass *analysis.Pass) (any, error) {
// Have: for k, v := range x { lhs = rhs }
assign := rng.Body.List[0].(*ast.AssignStmt)
+
+ // usesKV reports whether e references vars k or v.
+ usesKV := func(e ast.Expr) bool {
+ k := info.Defs[rng.Key.(*ast.Ident)]
+ v := info.Defs[rng.Value.(*ast.Ident)]
+ for n := range ast.Preorder(e) {
+ if id, ok := n.(*ast.Ident); ok {
+ obj := info.Uses[id]
+ if obj != nil && // don't rely on k, v being non-nil
+ (obj == k || obj == v) {
+ return true
+ }
+ }
+ }
+ return false
+ }
+
if index, ok := assign.Lhs[0].(*ast.IndexExpr); ok &&
+ len(assign.Lhs) == 1 &&
astutil.EqualSyntax(rng.Key, index.Index) &&
- astutil.EqualSyntax(rng.Value, assign.Rhs[0]) {
+ astutil.EqualSyntax(rng.Value, assign.Rhs[0]) &&
+ !usesKV(index.X) { // reject (e.g.) f(k, v)[k] = v
if tmap, ok := typeparams.CoreType(info.TypeOf(index.X)).(*types.Map); ok &&
types.Identical(info.TypeOf(index), info.TypeOf(rng.Value)) && // m[k], v
types.Identical(tmap.Key(), info.TypeOf(rng.Key)) {
diff --git a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/modernize.go b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/modernize.go
index 013ce79d6c..f09a2d26ca 100644
--- a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/modernize.go
+++ b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/modernize.go
@@ -34,7 +34,7 @@ var doc string
var Suite = []*analysis.Analyzer{
AnyAnalyzer,
// AppendClippedAnalyzer, // not nil-preserving!
- BLoopAnalyzer,
+ // BLoopAnalyzer, // may skew benchmark results, see golang/go#74967
FmtAppendfAnalyzer,
ForVarAnalyzer,
MapsLoopAnalyzer,
diff --git a/src/cmd/vendor/modules.txt b/src/cmd/vendor/modules.txt
index 7c122cd9d1..9c179c4bcd 100644
--- a/src/cmd/vendor/modules.txt
+++ b/src/cmd/vendor/modules.txt
@@ -73,7 +73,7 @@ golang.org/x/text/internal/tag
golang.org/x/text/language
golang.org/x/text/transform
golang.org/x/text/unicode/norm
-# golang.org/x/tools v0.39.1-0.20251205000126-062ef7b6ced2
+# golang.org/x/tools v0.39.1-0.20251230210517-d44be789a05c
## explicit; go 1.24.0
golang.org/x/tools/cmd/bisect
golang.org/x/tools/cover
diff --git a/src/crypto/cipher/gcm_fips140v2.0_test.go b/src/crypto/cipher/gcm_fips140v1.26_test.go
index d3a8ea5c63..9f17a497ca 100644
--- a/src/crypto/cipher/gcm_fips140v2.0_test.go
+++ b/src/crypto/cipher/gcm_fips140v1.26_test.go
@@ -18,10 +18,10 @@ import (
"testing"
)
-func TestGCMNoncesFIPSV2(t *testing.T) {
+func TestGCMNoncesFIPSV126(t *testing.T) {
cryptotest.MustSupportFIPS140(t)
if !fips140.Enabled {
- cmd := testenv.Command(t, testenv.Executable(t), "-test.run=^TestGCMNoncesFIPSV2$", "-test.v")
+ cmd := testenv.Command(t, testenv.Executable(t), "-test.run=^TestGCMNoncesFIPSV126$", "-test.v")
cmd.Env = append(cmd.Environ(), "GODEBUG=fips140=on")
out, err := cmd.CombinedOutput()
t.Logf("running with GODEBUG=fips140=on:\n%s", out)
diff --git a/src/crypto/hpke/aead_fipsv1.0.go b/src/crypto/hpke/aead_fips140v1.0.go
index 986126cbf9..986126cbf9 100644
--- a/src/crypto/hpke/aead_fipsv1.0.go
+++ b/src/crypto/hpke/aead_fips140v1.0.go
diff --git a/src/crypto/hpke/aead_fipsv2.0.go b/src/crypto/hpke/aead_fips140v1.26.go
index 710eb1c08f..710eb1c08f 100644
--- a/src/crypto/hpke/aead_fipsv2.0.go
+++ b/src/crypto/hpke/aead_fips140v1.26.go
diff --git a/src/crypto/internal/fips140only/fips140only_test.go b/src/crypto/internal/fips140only/fips140only_test.go
new file mode 100644
index 0000000000..96df536d56
--- /dev/null
+++ b/src/crypto/internal/fips140only/fips140only_test.go
@@ -0,0 +1,408 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fips140only_test
+
+import (
+ "crypto"
+ "crypto/aes"
+ "crypto/cipher"
+ "crypto/des"
+ "crypto/dsa"
+ "crypto/ecdh"
+ "crypto/ecdsa"
+ "crypto/ed25519"
+ "crypto/elliptic"
+ "crypto/hkdf"
+ "crypto/hmac"
+ "crypto/hpke"
+ "crypto/internal/cryptotest"
+ "crypto/internal/fips140"
+ "crypto/internal/fips140only"
+ "crypto/md5"
+ "crypto/mlkem"
+ "crypto/mlkem/mlkemtest"
+ "crypto/pbkdf2"
+ "crypto/rand"
+ "crypto/rc4"
+ "crypto/rsa"
+ "crypto/sha1"
+ "crypto/sha256"
+ _ "crypto/sha3"
+ _ "crypto/sha512"
+ "crypto/x509"
+ "encoding/pem"
+ "fmt"
+ "internal/godebug"
+ "internal/testenv"
+ "io"
+ "math/big"
+ "os"
+ "strings"
+ "testing"
+
+ "golang.org/x/crypto/chacha20poly1305"
+)
+
+func TestFIPS140Only(t *testing.T) {
+ cryptotest.MustSupportFIPS140(t)
+ if !fips140only.Enforced() {
+ cmd := testenv.Command(t, testenv.Executable(t), "-test.run=^TestFIPS140Only$", "-test.v")
+ cmd.Env = append(cmd.Environ(), "GODEBUG=fips140=only")
+ out, err := cmd.CombinedOutput()
+ t.Logf("running with GODEBUG=fips140=only:\n%s", out)
+ if err != nil {
+ t.Errorf("fips140=only subprocess failed: %v", err)
+ }
+ return
+ }
+ t.Run("cryptocustomrand=0", func(t *testing.T) {
+ t.Setenv("GODEBUG", os.Getenv("GODEBUG")+",cryptocustomrand=0")
+ testFIPS140Only(t)
+ })
+ t.Run("cryptocustomrand=1", func(t *testing.T) {
+ t.Setenv("GODEBUG", os.Getenv("GODEBUG")+",cryptocustomrand=1")
+ testFIPS140Only(t)
+ })
+}
+
+func testFIPS140Only(t *testing.T) {
+ if !fips140only.Enforced() {
+ t.Fatal("FIPS 140-only mode not enforced")
+ }
+ t.Logf("GODEBUG=fips140=only enabled")
+ fips140.ResetServiceIndicator()
+
+ aesBlock, err := aes.NewCipher(make([]byte, 16))
+ if err != nil {
+ t.Fatal(err)
+ }
+ notAESBlock := blockWrap{aesBlock}
+ iv := make([]byte, aes.BlockSize)
+
+ cipher.NewCBCEncrypter(aesBlock, iv)
+ expectPanic(t, func() { cipher.NewCBCEncrypter(notAESBlock, iv) })
+ cipher.NewCBCDecrypter(aesBlock, iv)
+ expectPanic(t, func() { cipher.NewCBCDecrypter(notAESBlock, iv) })
+
+ expectPanic(t, func() { cipher.NewCFBEncrypter(aesBlock, iv) })
+ expectPanic(t, func() { cipher.NewCFBDecrypter(aesBlock, iv) })
+
+ cipher.NewCTR(aesBlock, iv)
+ expectPanic(t, func() { cipher.NewCTR(notAESBlock, iv) })
+
+ expectPanic(t, func() { cipher.NewOFB(aesBlock, iv) })
+
+ expectErr(t, errRet2(cipher.NewGCM(aesBlock)))
+ expectErr(t, errRet2(cipher.NewGCMWithNonceSize(aesBlock, 12)))
+ expectErr(t, errRet2(cipher.NewGCMWithTagSize(aesBlock, 12)))
+ expectNoErr(t, errRet2(cipher.NewGCMWithRandomNonce(aesBlock)))
+
+ expectErr(t, errRet2(des.NewCipher(make([]byte, 8))))
+ expectErr(t, errRet2(des.NewTripleDESCipher(make([]byte, 24))))
+
+ expectErr(t, errRet2(rc4.NewCipher(make([]byte, 16))))
+
+ expectErr(t, errRet2(chacha20poly1305.New(make([]byte, chacha20poly1305.KeySize))))
+ expectErr(t, errRet2(chacha20poly1305.NewX(make([]byte, chacha20poly1305.KeySize))))
+
+ expectPanic(t, func() { md5.New().Sum(nil) })
+ expectErr(t, errRet2(md5.New().Write(make([]byte, 16))))
+ expectPanic(t, func() { md5.Sum([]byte("foo")) })
+
+ expectPanic(t, func() { sha1.New().Sum(nil) })
+ expectErr(t, errRet2(sha1.New().Write(make([]byte, 16))))
+ expectPanic(t, func() { sha1.Sum([]byte("foo")) })
+
+ withApprovedHash(func(h crypto.Hash) { h.New().Sum(nil) })
+ withNonApprovedHash(func(h crypto.Hash) { expectPanic(t, func() { h.New().Sum(nil) }) })
+
+ expectErr(t, errRet2(pbkdf2.Key(sha256.New, "password", make([]byte, 16), 1, 10)))
+ expectErr(t, errRet2(pbkdf2.Key(sha256.New, "password", make([]byte, 10), 1, 14)))
+ withNonApprovedHash(func(h crypto.Hash) {
+ expectErr(t, errRet2(pbkdf2.Key(h.New, "password", make([]byte, 16), 1, 14)))
+ })
+ withApprovedHash(func(h crypto.Hash) {
+ expectNoErr(t, errRet2(pbkdf2.Key(h.New, "password", make([]byte, 16), 1, 14)))
+ })
+
+ expectPanic(t, func() { hmac.New(sha256.New, make([]byte, 10)) })
+ withNonApprovedHash(func(h crypto.Hash) {
+ expectPanic(t, func() { hmac.New(h.New, make([]byte, 16)) })
+ })
+ withApprovedHash(func(h crypto.Hash) { hmac.New(h.New, make([]byte, 16)) })
+
+ expectErr(t, errRet2(hkdf.Key(sha256.New, make([]byte, 10), nil, "", 16)))
+ withNonApprovedHash(func(h crypto.Hash) {
+ expectErr(t, errRet2(hkdf.Key(h.New, make([]byte, 16), nil, "", 16)))
+ })
+ withApprovedHash(func(h crypto.Hash) {
+ expectNoErr(t, errRet2(hkdf.Key(h.New, make([]byte, 16), nil, "", 16)))
+ })
+
+ expectErr(t, errRet2(hkdf.Extract(sha256.New, make([]byte, 10), nil)))
+ withNonApprovedHash(func(h crypto.Hash) {
+ expectErr(t, errRet2(hkdf.Extract(h.New, make([]byte, 16), nil)))
+ })
+ withApprovedHash(func(h crypto.Hash) {
+ expectNoErr(t, errRet2(hkdf.Extract(h.New, make([]byte, 16), nil)))
+ })
+
+ expectErr(t, errRet2(hkdf.Expand(sha256.New, make([]byte, 10), "", 16)))
+ withNonApprovedHash(func(h crypto.Hash) {
+ expectErr(t, errRet2(hkdf.Expand(h.New, make([]byte, 16), "", 16)))
+ })
+ withApprovedHash(func(h crypto.Hash) {
+ expectNoErr(t, errRet2(hkdf.Expand(h.New, make([]byte, 16), "", 16)))
+ })
+
+ expectErr(t, errRet2(rand.Prime(rand.Reader, 10)))
+
+ expectErr(t, dsa.GenerateParameters(&dsa.Parameters{}, rand.Reader, dsa.L1024N160))
+ expectErr(t, dsa.GenerateKey(&dsa.PrivateKey{}, rand.Reader))
+ expectErr(t, errRet3(dsa.Sign(rand.Reader, &dsa.PrivateKey{}, make([]byte, 16))))
+ expectPanic(t, func() {
+ dsa.Verify(&dsa.PublicKey{}, make([]byte, 16), big.NewInt(1), big.NewInt(1))
+ })
+
+ expectErr(t, errRet2(ecdh.X25519().GenerateKey(rand.Reader)))
+ expectErr(t, errRet2(ecdh.X25519().NewPrivateKey(make([]byte, 32))))
+ expectErr(t, errRet2(ecdh.X25519().NewPublicKey(make([]byte, 32))))
+ for _, curve := range []ecdh.Curve{ecdh.P256(), ecdh.P384(), ecdh.P521()} {
+ expectErrIfCustomRand(t, errRet2(curve.GenerateKey(readerWrap{rand.Reader})))
+ k, err := curve.GenerateKey(rand.Reader)
+ if err != nil {
+ t.Fatal(err)
+ }
+ expectNoErr(t, errRet2(curve.NewPrivateKey(k.Bytes())))
+ expectNoErr(t, errRet2(curve.NewPublicKey(k.PublicKey().Bytes())))
+ }
+
+ for _, curve := range []elliptic.Curve{elliptic.P256(), elliptic.P384(), elliptic.P521()} {
+ expectErrIfCustomRand(t, errRet2(ecdsa.GenerateKey(curve, readerWrap{rand.Reader})))
+ k, err := ecdsa.GenerateKey(curve, rand.Reader)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ expectErrIfCustomRand(t, errRet2(k.Sign(readerWrap{rand.Reader}, make([]byte, 32), nil)))
+ expectErrIfCustomRand(t, errRet2(ecdsa.SignASN1(readerWrap{rand.Reader}, k, make([]byte, 32))))
+ expectErrIfCustomRand(t, errRet3(ecdsa.Sign(readerWrap{rand.Reader}, k, make([]byte, 32))))
+ expectNoErr(t, errRet2(k.Sign(rand.Reader, make([]byte, 32), nil)))
+ expectNoErr(t, errRet2(ecdsa.SignASN1(rand.Reader, k, make([]byte, 32))))
+ expectNoErr(t, errRet3(ecdsa.Sign(rand.Reader, k, make([]byte, 32))))
+
+ withNonApprovedHash(func(h crypto.Hash) {
+ expectErr(t, errRet2(k.Sign(nil, make([]byte, h.Size()), h)))
+ })
+ withApprovedHash(func(h crypto.Hash) {
+ expectNoErr(t, errRet2(k.Sign(nil, make([]byte, h.Size()), h)))
+ })
+ }
+ customCurve := &elliptic.CurveParams{Name: "custom", P: big.NewInt(1)}
+ expectErr(t, errRet2(ecdsa.GenerateKey(customCurve, rand.Reader)))
+
+ _, ed25519Key, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ t.Fatal(err)
+ }
+ expectNoErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 32), crypto.Hash(0))))
+ expectNoErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 64), crypto.SHA512)))
+ // ed25519ctx is not allowed (but ed25519ph with context is).
+ expectErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 32), &ed25519.Options{
+ Context: "test",
+ })))
+ expectNoErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 64), &ed25519.Options{
+ Hash: crypto.SHA512, Context: "test",
+ })))
+ expectNoErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 64), &ed25519.Options{
+ Hash: crypto.SHA512,
+ })))
+
+ expectErr(t, errRet2(rsa.GenerateMultiPrimeKey(rand.Reader, 3, 2048)))
+ expectErr(t, errRet2(rsa.GenerateKey(rand.Reader, 1024)))
+ expectErr(t, errRet2(rsa.GenerateKey(rand.Reader, 2049)))
+ expectErrIfCustomRand(t, errRet2(rsa.GenerateKey(readerWrap{rand.Reader}, 2048)))
+ rsaKey, err := rsa.GenerateKey(rand.Reader, 2048)
+ expectNoErr(t, err)
+
+ smallKey := parseKey(testingKey(`-----BEGIN RSA TESTING KEY-----
+MIICXQIBAAKBgQDMrln6XoAa3Rjts+kRi5obbP86qSf/562RcuDO+yMXeTLHfi4M
+8ubyhoFY+UKBCGBLmmTO7ikbvQgdipkT3xVkU8nM3XTW4sxrnw0X5QXsl4PGlMo0
+5UufxYyQxe7bbjuwFz2XnN6Jz4orpOfO0s36/KVHj9lZRl+REpr/Jy+nJQIDAQAB
+AoGAJ9WEwGO01cWSzOwXH2mGX/EKCQ4TsUuS7XwogU/B6BcXyVhmuPFq/ecsdDbq
+ePc62mvdU6JpELNsyWcIXKQtYsRgJHxNS+KJkCQIq6YeiAWRG0XL6q+qVj+HtT8a
+1Qrmul9ZBd23Y9wLF8pg/xWDQYvb8DPAb/xJ0e/KEBZcWU8CQQDXFCFCGpCfwyxY
+Cq8G/3B94D9UYwk5mK6jRIH5m8LbaX9bKKetf8+If8TWVgeuiRjjN4WEQ78lPoSg
+3Fsz2qs3AkEA85/JCudNUf2FnY+T6h1c/2SWekZiZ1NS4lCh/C7iYuAN3oa8zGkf
+gjjR5e0+Z8rUAcZkTukxyLLaNqy6rs9GgwJAVR6pXvEGhcQHe7yWso1LpvWl+q7L
+StkrXIBTdEb54j4pYhl/6wFnUB1I+I7JsYCeseYaWFM7hfDtKoCrM6V6FwJBANxh
+KmfmnJcSkw/YlaEuNrYAs+6gRNvbEBsRfba2Yqu2qlUl5Ruz7IDMDXPEjLMvU2DX
+ql2HrTU0NRlIXwdLESkCQQDGJ54H6WK1eE1YvtxCaLm28zmogcFlvc21pym+PpM1
+bXVL8iKLrG91IYQByUHZIn3WVAd2bfi4MfKagRt0ggd4
+-----END RSA TESTING KEY-----`))
+
+ expectNoErr(t, errRet2(rsaKey.Sign(rand.Reader, make([]byte, 32), crypto.SHA256)))
+ expectErr(t, errRet2(smallKey.Sign(rand.Reader, make([]byte, 32), crypto.SHA256)))
+ expectErr(t, errRet2(rsaKey.Sign(rand.Reader, make([]byte, 20), crypto.SHA1)))
+ // rand is always ignored for PKCS1v15 signing
+ expectNoErr(t, errRet2(rsaKey.Sign(readerWrap{rand.Reader}, make([]byte, 32), crypto.SHA256)))
+
+ sigPKCS1v15, err := rsa.SignPKCS1v15(rand.Reader, rsaKey, crypto.SHA256, make([]byte, 32))
+ expectNoErr(t, err)
+ expectErr(t, errRet2(rsa.SignPKCS1v15(rand.Reader, smallKey, crypto.SHA256, make([]byte, 32))))
+ expectErr(t, errRet2(rsa.SignPKCS1v15(rand.Reader, rsaKey, crypto.SHA1, make([]byte, 20))))
+ // rand is always ignored for PKCS1v15 signing
+ expectNoErr(t, errRet2(rsa.SignPKCS1v15(readerWrap{rand.Reader}, rsaKey, crypto.SHA256, make([]byte, 32))))
+
+ expectNoErr(t, rsa.VerifyPKCS1v15(&rsaKey.PublicKey, crypto.SHA256, make([]byte, 32), sigPKCS1v15))
+ expectErr(t, rsa.VerifyPKCS1v15(&smallKey.PublicKey, crypto.SHA256, make([]byte, 32), sigPKCS1v15))
+ expectErr(t, rsa.VerifyPKCS1v15(&rsaKey.PublicKey, crypto.SHA1, make([]byte, 20), sigPKCS1v15))
+
+ sigPSS, err := rsa.SignPSS(rand.Reader, rsaKey, crypto.SHA256, make([]byte, 32), nil)
+ expectNoErr(t, err)
+ expectErr(t, errRet2(rsa.SignPSS(rand.Reader, smallKey, crypto.SHA256, make([]byte, 32), nil)))
+ expectErr(t, errRet2(rsa.SignPSS(rand.Reader, rsaKey, crypto.SHA1, make([]byte, 20), nil)))
+ expectErr(t, errRet2(rsa.SignPSS(readerWrap{rand.Reader}, rsaKey, crypto.SHA256, make([]byte, 32), nil)))
+
+ expectNoErr(t, rsa.VerifyPSS(&rsaKey.PublicKey, crypto.SHA256, make([]byte, 32), sigPSS, nil))
+ expectErr(t, rsa.VerifyPSS(&smallKey.PublicKey, crypto.SHA256, make([]byte, 32), sigPSS, nil))
+ expectErr(t, rsa.VerifyPSS(&rsaKey.PublicKey, crypto.SHA1, make([]byte, 20), sigPSS, nil))
+
+ k, err := mlkem.GenerateKey768()
+ expectNoErr(t, err)
+ expectErr(t, errRet3(mlkemtest.Encapsulate768(k.EncapsulationKey(), make([]byte, 32))))
+ k1024, err := mlkem.GenerateKey1024()
+ expectNoErr(t, err)
+ expectErr(t, errRet3(mlkemtest.Encapsulate1024(k1024.EncapsulationKey(), make([]byte, 32))))
+
+ for _, kem := range []hpke.KEM{
+ hpke.DHKEM(ecdh.P256()),
+ hpke.DHKEM(ecdh.P384()),
+ hpke.DHKEM(ecdh.P521()),
+ hpke.MLKEM768(),
+ hpke.MLKEM1024(),
+ hpke.MLKEM768P256(),
+ hpke.MLKEM1024P384(),
+ hpke.MLKEM768X25519(), // allowed as hybrid
+ } {
+ t.Run(fmt.Sprintf("HKPE KEM %04x", kem.ID()), func(t *testing.T) {
+ k, err := kem.GenerateKey()
+ expectNoErr(t, err)
+ expectNoErr(t, errRet2(kem.DeriveKeyPair(make([]byte, 64))))
+ kb, err := k.Bytes()
+ expectNoErr(t, err)
+ expectNoErr(t, errRet2(kem.NewPrivateKey(kb)))
+ expectNoErr(t, errRet2(kem.NewPublicKey(k.PublicKey().Bytes())))
+ if fips140.Version() == "v1.0.0" {
+ t.Skip("FIPS 140-3 Module v1.0.0 does not provide HPKE GCM modes")
+ }
+ c, err := hpke.Seal(k.PublicKey(), hpke.HKDFSHA256(), hpke.AES128GCM(), nil, nil)
+ expectNoErr(t, err)
+ _, err = hpke.Open(k, hpke.HKDFSHA256(), hpke.AES128GCM(), nil, c)
+ expectNoErr(t, err)
+ })
+ }
+ expectErr(t, errRet2(hpke.DHKEM(ecdh.X25519()).GenerateKey()))
+ expectErr(t, errRet2(hpke.DHKEM(ecdh.X25519()).DeriveKeyPair(make([]byte, 64))))
+ expectErr(t, errRet2(hpke.DHKEM(ecdh.X25519()).NewPrivateKey(make([]byte, 32))))
+ expectErr(t, errRet2(hpke.DHKEM(ecdh.X25519()).NewPublicKey(make([]byte, 32))))
+ hpkeK, err := hpke.MLKEM768().GenerateKey()
+ expectNoErr(t, err)
+ expectErr(t, errRet2(hpke.Seal(hpkeK.PublicKey(), hpke.HKDFSHA256(), hpke.ChaCha20Poly1305(), nil, nil)))
+ expectErr(t, errRet2(hpke.Open(hpkeK, hpke.HKDFSHA256(), hpke.ChaCha20Poly1305(), nil, make([]byte, 2000))))
+
+ // fips140=only mode should prevent any operation that would make the FIPS
+ // 140-3 module set its service indicator to false.
+ if !fips140.ServiceIndicator() {
+ t.Errorf("service indicator not set")
+ }
+}
+
+type blockWrap struct {
+ cipher.Block
+}
+
+type readerWrap struct {
+ io.Reader
+}
+
+func withApprovedHash(f func(crypto.Hash)) {
+ f(crypto.SHA224)
+ f(crypto.SHA256)
+ f(crypto.SHA384)
+ f(crypto.SHA512)
+ f(crypto.SHA3_224)
+ f(crypto.SHA3_256)
+ f(crypto.SHA3_384)
+ f(crypto.SHA3_512)
+ f(crypto.SHA512_224)
+ f(crypto.SHA512_256)
+}
+
+func withNonApprovedHash(f func(crypto.Hash)) {
+ f(crypto.MD5)
+ f(crypto.SHA1)
+}
+
+func expectPanic(t *testing.T, f func()) {
+ t.Helper()
+ defer func() {
+ t.Helper()
+ if err := recover(); err == nil {
+ t.Errorf("expected panic")
+ } else {
+ if s, ok := err.(string); !ok || !strings.Contains(s, "FIPS 140-only") {
+ t.Errorf("unexpected panic: %v", err)
+ }
+ }
+ }()
+ f()
+}
+
+var cryptocustomrand = godebug.New("cryptocustomrand")
+
+func expectErr(t *testing.T, err error) {
+ t.Helper()
+ if err == nil {
+ t.Errorf("expected error")
+ } else if !strings.Contains(err.Error(), "FIPS 140-only") {
+ t.Errorf("unexpected error: %v", err)
+ }
+}
+
+func expectNoErr(t *testing.T, err error) {
+ t.Helper()
+ if err != nil {
+ t.Errorf("unexpected error: %v", err)
+ }
+}
+
+func expectErrIfCustomRand(t *testing.T, err error) {
+ t.Helper()
+ if cryptocustomrand.Value() == "1" {
+ expectErr(t, err)
+ } else {
+ expectNoErr(t, err)
+ }
+}
+
+func errRet2[T any](_ T, err error) error {
+ return err
+}
+
+func errRet3[T any](_, _ T, err error) error {
+ return err
+}
+
+func testingKey(s string) string { return strings.ReplaceAll(s, "TESTING KEY", "PRIVATE KEY") }
+
+func parseKey(s string) *rsa.PrivateKey {
+ p, _ := pem.Decode([]byte(s))
+ k, err := x509.ParsePKCS1PrivateKey(p.Bytes)
+ if err != nil {
+ panic(err)
+ }
+ return k
+}
diff --git a/src/crypto/internal/fips140test/acvp_capabilities_fips140v2.0.json b/src/crypto/internal/fips140test/acvp_capabilities_fips140v1.26.json
index 33c8aa235b..33c8aa235b 100644
--- a/src/crypto/internal/fips140test/acvp_capabilities_fips140v2.0.json
+++ b/src/crypto/internal/fips140test/acvp_capabilities_fips140v1.26.json
diff --git a/src/crypto/internal/fips140test/acvp_fips140v2.0_test.go b/src/crypto/internal/fips140test/acvp_fips140v1.26_test.go
index e9ef91537a..10a44f1492 100644
--- a/src/crypto/internal/fips140test/acvp_fips140v2.0_test.go
+++ b/src/crypto/internal/fips140test/acvp_fips140v1.26_test.go
@@ -12,10 +12,10 @@ import (
"fmt"
)
-//go:embed acvp_capabilities_fips140v2.0.json
+//go:embed acvp_capabilities_fips140v1.26.json
var capabilitiesJson []byte
-var testConfigFile = "acvp_test_fips140v2.0.config.json"
+var testConfigFile = "acvp_test_fips140v1.26.config.json"
func init() {
commands["ML-DSA-44/keyGen"] = cmdMlDsaKeyGenAft(mldsa.NewPrivateKey44)
diff --git a/src/crypto/internal/fips140test/acvp_test_fips140v2.0.config.json b/src/crypto/internal/fips140test/acvp_test_fips140v1.26.config.json
index 51c76d9288..51c76d9288 100644
--- a/src/crypto/internal/fips140test/acvp_test_fips140v2.0.config.json
+++ b/src/crypto/internal/fips140test/acvp_test_fips140v1.26.config.json
diff --git a/src/crypto/internal/fips140test/cast_fips140v1.0_test.go b/src/crypto/internal/fips140test/cast_fips140v1.0_test.go
index 4780966208..b9ddfe4d8b 100644
--- a/src/crypto/internal/fips140test/cast_fips140v1.0_test.go
+++ b/src/crypto/internal/fips140test/cast_fips140v1.0_test.go
@@ -6,4 +6,4 @@
package fipstest
-func fips140v2Conditionals() {}
+func fips140v126Conditionals() {}
diff --git a/src/crypto/internal/fips140test/cast_fips140v2.0_test.go b/src/crypto/internal/fips140test/cast_fips140v1.26_test.go
index 06e0513a7f..ef79068c38 100644
--- a/src/crypto/internal/fips140test/cast_fips140v2.0_test.go
+++ b/src/crypto/internal/fips140test/cast_fips140v1.26_test.go
@@ -8,7 +8,7 @@ package fipstest
import "crypto/internal/fips140/mldsa"
-func fips140v2Conditionals() {
+func fips140v126Conditionals() {
// ML-DSA sign and verify PCT
kMLDSA := mldsa.GenerateKey44()
// ML-DSA-44
diff --git a/src/crypto/internal/fips140test/cast_test.go b/src/crypto/internal/fips140test/cast_test.go
index 5a80006622..817dcb9a35 100644
--- a/src/crypto/internal/fips140test/cast_test.go
+++ b/src/crypto/internal/fips140test/cast_test.go
@@ -115,7 +115,7 @@ func TestAllCASTs(t *testing.T) {
// TestConditionals causes the conditional CASTs and PCTs to be invoked.
func TestConditionals(t *testing.T) {
- fips140v2Conditionals()
+ fips140v126Conditionals()
// ML-KEM PCT
kMLKEM, err := mlkem.GenerateKey768()
if err != nil {
diff --git a/src/crypto/internal/rand/rand_fipsv1.0.go b/src/crypto/internal/rand/rand_fips140v1.0.go
index 29eba7e0bc..29eba7e0bc 100644
--- a/src/crypto/internal/rand/rand_fipsv1.0.go
+++ b/src/crypto/internal/rand/rand_fips140v1.0.go
diff --git a/src/crypto/internal/rand/rand_fipsv2.0.go b/src/crypto/internal/rand/rand_fips140v1.26.go
index 0dc18e7883..0dc18e7883 100644
--- a/src/crypto/internal/rand/rand_fipsv2.0.go
+++ b/src/crypto/internal/rand/rand_fips140v1.26.go
diff --git a/src/crypto/tls/conn.go b/src/crypto/tls/conn.go
index c04c7a506e..a840125a45 100644
--- a/src/crypto/tls/conn.go
+++ b/src/crypto/tls/conn.go
@@ -224,6 +224,9 @@ func (hc *halfConn) changeCipherSpec() error {
return nil
}
+// setTrafficSecret sets the traffic secret for the given encryption level. setTrafficSecret
+// should not be called directly, but rather through the Conn setWriteTrafficSecret and
+// setReadTrafficSecret wrapper methods.
func (hc *halfConn) setTrafficSecret(suite *cipherSuiteTLS13, level QUICEncryptionLevel, secret []byte) {
hc.trafficSecret = secret
hc.level = level
@@ -1339,9 +1342,6 @@ func (c *Conn) handleKeyUpdate(keyUpdate *keyUpdateMsg) error {
return c.in.setErrorLocked(c.sendAlert(alertInternalError))
}
- newSecret := cipherSuite.nextTrafficSecret(c.in.trafficSecret)
- c.in.setTrafficSecret(cipherSuite, QUICEncryptionLevelInitial, newSecret)
-
if keyUpdate.updateRequested {
c.out.Lock()
defer c.out.Unlock()
@@ -1359,7 +1359,12 @@ func (c *Conn) handleKeyUpdate(keyUpdate *keyUpdateMsg) error {
}
newSecret := cipherSuite.nextTrafficSecret(c.out.trafficSecret)
- c.out.setTrafficSecret(cipherSuite, QUICEncryptionLevelInitial, newSecret)
+ c.setWriteTrafficSecret(cipherSuite, QUICEncryptionLevelInitial, newSecret)
+ }
+
+ newSecret := cipherSuite.nextTrafficSecret(c.in.trafficSecret)
+ if err := c.setReadTrafficSecret(cipherSuite, QUICEncryptionLevelInitial, newSecret); err != nil {
+ return err
}
return nil
@@ -1576,7 +1581,9 @@ func (c *Conn) handshakeContext(ctx context.Context) (ret error) {
// Provide the 1-RTT read secret now that the handshake is complete.
// The QUIC layer MUST NOT decrypt 1-RTT packets prior to completing
// the handshake (RFC 9001, Section 5.7).
- c.quicSetReadSecret(QUICEncryptionLevelApplication, c.cipherSuite, c.in.trafficSecret)
+ if err := c.quicSetReadSecret(QUICEncryptionLevelApplication, c.cipherSuite, c.in.trafficSecret); err != nil {
+ return err
+ }
} else {
c.out.Lock()
a, ok := errors.AsType[alert](c.out.err)
@@ -1672,3 +1679,25 @@ func (c *Conn) VerifyHostname(host string) error {
}
return c.peerCertificates[0].VerifyHostname(host)
}
+
+// setReadTrafficSecret sets the read traffic secret for the given encryption level. If
+// being called at the same time as setWriteTrafficSecret, the caller must ensure the call
+// to setWriteTrafficSecret happens first so any alerts are sent at the write level.
+func (c *Conn) setReadTrafficSecret(suite *cipherSuiteTLS13, level QUICEncryptionLevel, secret []byte) error {
+ // Ensure that there are no buffered handshake messages before changing the
+ // read keys, since that can cause messages to be parsed that were encrypted
+ // using old keys which are no longer appropriate.
+ if c.hand.Len() != 0 {
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: handshake buffer not empty before setting read traffic secret")
+ }
+ c.in.setTrafficSecret(suite, level, secret)
+ return nil
+}
+
+// setWriteTrafficSecret sets the write traffic secret for the given encryption level. If
+// being called at the same time as setReadTrafficSecret, the caller must ensure the call
+// to setWriteTrafficSecret happens first so any alerts are sent at the write level.
+func (c *Conn) setWriteTrafficSecret(suite *cipherSuiteTLS13, level QUICEncryptionLevel, secret []byte) {
+ c.out.setTrafficSecret(suite, level, secret)
+}
diff --git a/src/crypto/tls/handshake_client_tls13.go b/src/crypto/tls/handshake_client_tls13.go
index e696bd3a13..77a24b4a78 100644
--- a/src/crypto/tls/handshake_client_tls13.go
+++ b/src/crypto/tls/handshake_client_tls13.go
@@ -490,16 +490,17 @@ func (hs *clientHandshakeStateTLS13) establishHandshakeKeys() error {
handshakeSecret := earlySecret.HandshakeSecret(sharedKey)
clientSecret := handshakeSecret.ClientHandshakeTrafficSecret(hs.transcript)
- c.out.setTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, clientSecret)
+ c.setWriteTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, clientSecret)
serverSecret := handshakeSecret.ServerHandshakeTrafficSecret(hs.transcript)
- c.in.setTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, serverSecret)
+ if err := c.setReadTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, serverSecret); err != nil {
+ return err
+ }
if c.quic != nil {
- if c.hand.Len() != 0 {
- c.sendAlert(alertUnexpectedMessage)
- }
c.quicSetWriteSecret(QUICEncryptionLevelHandshake, hs.suite.id, clientSecret)
- c.quicSetReadSecret(QUICEncryptionLevelHandshake, hs.suite.id, serverSecret)
+ if err := c.quicSetReadSecret(QUICEncryptionLevelHandshake, hs.suite.id, serverSecret); err != nil {
+ return err
+ }
}
err = c.config.writeKeyLog(keyLogLabelClientHandshake, hs.hello.random, clientSecret)
@@ -710,7 +711,9 @@ func (hs *clientHandshakeStateTLS13) readServerFinished() error {
hs.trafficSecret = hs.masterSecret.ClientApplicationTrafficSecret(hs.transcript)
serverSecret := hs.masterSecret.ServerApplicationTrafficSecret(hs.transcript)
- c.in.setTrafficSecret(hs.suite, QUICEncryptionLevelApplication, serverSecret)
+ if err := c.setReadTrafficSecret(hs.suite, QUICEncryptionLevelApplication, serverSecret); err != nil {
+ return err
+ }
err = c.config.writeKeyLog(keyLogLabelClientTraffic, hs.hello.random, hs.trafficSecret)
if err != nil {
@@ -813,16 +816,13 @@ func (hs *clientHandshakeStateTLS13) sendClientFinished() error {
return err
}
- c.out.setTrafficSecret(hs.suite, QUICEncryptionLevelApplication, hs.trafficSecret)
+ c.setWriteTrafficSecret(hs.suite, QUICEncryptionLevelApplication, hs.trafficSecret)
if !c.config.SessionTicketsDisabled && c.config.ClientSessionCache != nil {
c.resumptionSecret = hs.masterSecret.ResumptionMasterSecret(hs.transcript)
}
if c.quic != nil {
- if c.hand.Len() != 0 {
- c.sendAlert(alertUnexpectedMessage)
- }
c.quicSetWriteSecret(QUICEncryptionLevelApplication, hs.suite.id, hs.trafficSecret)
}
diff --git a/src/crypto/tls/handshake_server_tls13.go b/src/crypto/tls/handshake_server_tls13.go
index 3bed1359a3..b066924e29 100644
--- a/src/crypto/tls/handshake_server_tls13.go
+++ b/src/crypto/tls/handshake_server_tls13.go
@@ -410,7 +410,9 @@ func (hs *serverHandshakeStateTLS13) checkForResumption() error {
return err
}
earlyTrafficSecret := hs.earlySecret.ClientEarlyTrafficSecret(transcript)
- c.quicSetReadSecret(QUICEncryptionLevelEarly, hs.suite.id, earlyTrafficSecret)
+ if err := c.quicSetReadSecret(QUICEncryptionLevelEarly, hs.suite.id, earlyTrafficSecret); err != nil {
+ return err
+ }
}
c.didResume = true
@@ -514,6 +516,14 @@ func (hs *serverHandshakeStateTLS13) sendDummyChangeCipherSpec() error {
func (hs *serverHandshakeStateTLS13) doHelloRetryRequest(selectedGroup CurveID) (*keyShare, error) {
c := hs.c
+ // Make sure the client didn't send extra handshake messages alongside
+ // their initial client_hello. If they sent two client_hello messages,
+ // we will consume the second before they respond to the server_hello.
+ if c.hand.Len() != 0 {
+ c.sendAlert(alertUnexpectedMessage)
+ return nil, errors.New("tls: handshake buffer not empty before HelloRetryRequest")
+ }
+
// The first ClientHello gets double-hashed into the transcript upon a
// HelloRetryRequest. See RFC 8446, Section 4.4.1.
if err := transcriptMsg(hs.clientHello, hs.transcript); err != nil {
@@ -733,17 +743,18 @@ func (hs *serverHandshakeStateTLS13) sendServerParameters() error {
}
hs.handshakeSecret = earlySecret.HandshakeSecret(hs.sharedKey)
- clientSecret := hs.handshakeSecret.ClientHandshakeTrafficSecret(hs.transcript)
- c.in.setTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, clientSecret)
serverSecret := hs.handshakeSecret.ServerHandshakeTrafficSecret(hs.transcript)
- c.out.setTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, serverSecret)
+ c.setWriteTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, serverSecret)
+ clientSecret := hs.handshakeSecret.ClientHandshakeTrafficSecret(hs.transcript)
+ if err := c.setReadTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, clientSecret); err != nil {
+ return err
+ }
if c.quic != nil {
- if c.hand.Len() != 0 {
- c.sendAlert(alertUnexpectedMessage)
- }
c.quicSetWriteSecret(QUICEncryptionLevelHandshake, hs.suite.id, serverSecret)
- c.quicSetReadSecret(QUICEncryptionLevelHandshake, hs.suite.id, clientSecret)
+ if err := c.quicSetReadSecret(QUICEncryptionLevelHandshake, hs.suite.id, clientSecret); err != nil {
+ return err
+ }
}
err := c.config.writeKeyLog(keyLogLabelClientHandshake, hs.clientHello.random, clientSecret)
@@ -887,13 +898,9 @@ func (hs *serverHandshakeStateTLS13) sendServerFinished() error {
hs.trafficSecret = hs.masterSecret.ClientApplicationTrafficSecret(hs.transcript)
serverSecret := hs.masterSecret.ServerApplicationTrafficSecret(hs.transcript)
- c.out.setTrafficSecret(hs.suite, QUICEncryptionLevelApplication, serverSecret)
+ c.setWriteTrafficSecret(hs.suite, QUICEncryptionLevelApplication, serverSecret)
if c.quic != nil {
- if c.hand.Len() != 0 {
- // TODO: Handle this in setTrafficSecret?
- c.sendAlert(alertUnexpectedMessage)
- }
c.quicSetWriteSecret(QUICEncryptionLevelApplication, hs.suite.id, serverSecret)
}
@@ -1123,7 +1130,9 @@ func (hs *serverHandshakeStateTLS13) readClientFinished() error {
return errors.New("tls: invalid client finished hash")
}
- c.in.setTrafficSecret(hs.suite, QUICEncryptionLevelApplication, hs.trafficSecret)
+ if err := c.setReadTrafficSecret(hs.suite, QUICEncryptionLevelApplication, hs.trafficSecret); err != nil {
+ return err
+ }
return nil
}
diff --git a/src/crypto/tls/handshake_test.go b/src/crypto/tls/handshake_test.go
index 6e15459a9a..9cea8182d0 100644
--- a/src/crypto/tls/handshake_test.go
+++ b/src/crypto/tls/handshake_test.go
@@ -7,6 +7,7 @@ package tls
import (
"bufio"
"bytes"
+ "context"
"crypto/ed25519"
"crypto/x509"
"encoding/hex"
@@ -638,3 +639,142 @@ var clientEd25519KeyPEM = testingKey(`
-----BEGIN TESTING KEY-----
MC4CAQAwBQYDK2VwBCIEINifzf07d9qx3d44e0FSbV4mC/xQxT644RRbpgNpin7I
-----END TESTING KEY-----`)
+
+func TestServerHelloTrailingMessage(t *testing.T) {
+ // In TLS 1.3 the change cipher spec message is optional. If a CCS message
+ // is not sent, after reading the ServerHello, the read traffic secret is
+ // set, and all following messages must be encrypted. If the server sends
+ // additional unencrypted messages in a record with the ServerHello, the
+ // client must either fail or ignore the additional messages.
+
+ c, s := localPipe(t)
+ go func() {
+ ctx := context.Background()
+ srv := Server(s, testConfig)
+ clientHello, _, err := srv.readClientHello(ctx)
+ if err != nil {
+ testFatal(t, err)
+ }
+
+ hs := serverHandshakeStateTLS13{
+ c: srv,
+ ctx: ctx,
+ clientHello: clientHello,
+ }
+ if err := hs.processClientHello(); err != nil {
+ testFatal(t, err)
+ }
+ if err := transcriptMsg(hs.clientHello, hs.transcript); err != nil {
+ testFatal(t, err)
+ }
+
+ record, err := concatHandshakeMessages(hs.hello, &encryptedExtensionsMsg{alpnProtocol: "h2"})
+ if err != nil {
+ testFatal(t, err)
+ }
+
+ if _, err := s.Write(record); err != nil {
+ testFatal(t, err)
+ }
+ srv.Close()
+ }()
+
+ cli := Client(c, testConfig)
+ expectedErr := "tls: handshake buffer not empty before setting read traffic secret"
+ if err := cli.Handshake(); err == nil {
+ t.Fatal("expected error from incomplete handshake, got nil")
+ } else if err.Error() != expectedErr {
+ t.Fatalf("expected error %q, got %q", expectedErr, err.Error())
+ }
+}
+
+func TestClientHelloTrailingMessage(t *testing.T) {
+ // Same as TestServerHelloTrailingMessage but for the client side.
+
+ c, s := localPipe(t)
+ go func() {
+ cli := Client(c, testConfig)
+
+ hello, _, _, err := cli.makeClientHello()
+ if err != nil {
+ testFatal(t, err)
+ }
+
+ record, err := concatHandshakeMessages(hello, &certificateMsgTLS13{})
+ if err != nil {
+ testFatal(t, err)
+ }
+
+ if _, err := c.Write(record); err != nil {
+ testFatal(t, err)
+ }
+ cli.Close()
+ }()
+
+ srv := Server(s, testConfig)
+ expectedErr := "tls: handshake buffer not empty before setting read traffic secret"
+ if err := srv.Handshake(); err == nil {
+ t.Fatal("expected error from incomplete handshake, got nil")
+ } else if err.Error() != expectedErr {
+ t.Fatalf("expected error %q, got %q", expectedErr, err.Error())
+ }
+}
+
+func TestDoubleClientHelloHRR(t *testing.T) {
+ // If a client sends two ClientHello messages in a single record, and the
+ // server sends a HRR after reading the first ClientHello, the server must
+ // either fail or ignore the trailing ClientHello.
+
+ c, s := localPipe(t)
+
+ go func() {
+ cli := Client(c, testConfig)
+
+ hello, _, _, err := cli.makeClientHello()
+ if err != nil {
+ testFatal(t, err)
+ }
+ hello.keyShares = nil
+
+ record, err := concatHandshakeMessages(hello, hello)
+ if err != nil {
+ testFatal(t, err)
+ }
+
+ if _, err := c.Write(record); err != nil {
+ testFatal(t, err)
+ }
+ cli.Close()
+ }()
+
+ srv := Server(s, testConfig)
+ expectedErr := "tls: handshake buffer not empty before HelloRetryRequest"
+ if err := srv.Handshake(); err == nil {
+ t.Fatal("expected error from incomplete handshake, got nil")
+ } else if err.Error() != expectedErr {
+ t.Fatalf("expected error %q, got %q", expectedErr, err.Error())
+ }
+}
+
+// concatHandshakeMessages marshals and concatenates the given handshake
+// messages into a single record.
+func concatHandshakeMessages(msgs ...handshakeMessage) ([]byte, error) {
+ var marshalled []byte
+ for _, msg := range msgs {
+ data, err := msg.marshal()
+ if err != nil {
+ return nil, err
+ }
+ marshalled = append(marshalled, data...)
+ }
+ m := len(marshalled)
+ outBuf := make([]byte, recordHeaderLen)
+ outBuf[0] = byte(recordTypeHandshake)
+ vers := VersionTLS12
+ outBuf[1] = byte(vers >> 8)
+ outBuf[2] = byte(vers)
+ outBuf[3] = byte(m >> 8)
+ outBuf[4] = byte(m)
+ outBuf = append(outBuf, marshalled...)
+ return outBuf, nil
+}
diff --git a/src/crypto/tls/quic.go b/src/crypto/tls/quic.go
index b3f95dbb18..76b7eb2cbd 100644
--- a/src/crypto/tls/quic.go
+++ b/src/crypto/tls/quic.go
@@ -402,13 +402,22 @@ func (c *Conn) quicReadHandshakeBytes(n int) error {
return nil
}
-func (c *Conn) quicSetReadSecret(level QUICEncryptionLevel, suite uint16, secret []byte) {
+func (c *Conn) quicSetReadSecret(level QUICEncryptionLevel, suite uint16, secret []byte) error {
+ // Ensure that there are no buffered handshake messages before changing the
+ // read keys, since that can cause messages to be parsed that were encrypted
+ // using old keys which are no longer appropriate.
+ // TODO(roland): we should merge this check with the similar one in setReadTrafficSecret.
+ if c.hand.Len() != 0 {
+ c.sendAlert(alertUnexpectedMessage)
+ return errors.New("tls: handshake buffer not empty before setting read traffic secret")
+ }
c.quic.events = append(c.quic.events, QUICEvent{
Kind: QUICSetReadSecret,
Level: level,
Suite: suite,
Data: secret,
})
+ return nil
}
func (c *Conn) quicSetWriteSecret(level QUICEncryptionLevel, suite uint16, secret []byte) {
diff --git a/src/debug/pe/file.go b/src/debug/pe/file.go
index ed63a11cb6..91b7d1dca1 100644
--- a/src/debug/pe/file.go
+++ b/src/debug/pe/file.go
@@ -379,7 +379,11 @@ func (f *File) ImportedSymbols() ([]string, error) {
}
// seek to the virtual address specified in the import data directory
- d = d[idd.VirtualAddress-ds.VirtualAddress:]
+ seek := idd.VirtualAddress - ds.VirtualAddress
+ if seek >= uint32(len(d)) {
+ return nil, errors.New("optional header data directory virtual size doesn't fit within data seek")
+ }
+ d = d[seek:]
// start decoding the import directory
var ida []ImportDirectory
@@ -408,9 +412,16 @@ func (f *File) ImportedSymbols() ([]string, error) {
dt.dll, _ = getString(names, int(dt.Name-ds.VirtualAddress))
d, _ = ds.Data()
// seek to OriginalFirstThunk
- d = d[dt.OriginalFirstThunk-ds.VirtualAddress:]
+ seek := dt.OriginalFirstThunk - ds.VirtualAddress
+ if seek >= uint32(len(d)) {
+ return nil, errors.New("import directory original first thunk doesn't fit within data seek")
+ }
+ d = d[seek:]
for len(d) > 0 {
if pe64 { // 64bit
+ if len(d) < 8 {
+ return nil, errors.New("thunk parsing needs at least 8-bytes")
+ }
va := binary.LittleEndian.Uint64(d[0:8])
d = d[8:]
if va == 0 {
@@ -423,6 +434,9 @@ func (f *File) ImportedSymbols() ([]string, error) {
all = append(all, fn+":"+dt.dll)
}
} else { // 32bit
+ if len(d) <= 4 {
+ return nil, errors.New("thunk parsing needs at least 5-bytes")
+ }
va := binary.LittleEndian.Uint32(d[0:4])
d = d[4:]
if va == 0 {
diff --git a/src/encoding/gob/doc.go b/src/encoding/gob/doc.go
index c746806887..390f25088e 100644
--- a/src/encoding/gob/doc.go
+++ b/src/encoding/gob/doc.go
@@ -153,16 +153,16 @@ are transmitted, even if all the elements are zero.
Structs are sent as a sequence of (field number, field value) pairs. The field
value is sent using the standard gob encoding for its type, recursively. If a
-field has the zero value for its type (except for arrays; see above), it is omitted
-from the transmission. The field number is defined by the type of the encoded
-struct: the first field of the encoded type is field 0, the second is field 1,
-etc. When encoding a value, the field numbers are delta encoded for efficiency
-and the fields are always sent in order of increasing field number; the deltas are
-therefore unsigned. The initialization for the delta encoding sets the field
-number to -1, so an unsigned integer field 0 with value 7 is transmitted as unsigned
-delta = 1, unsigned value = 7 or (01 07). Finally, after all the fields have been
-sent a terminating mark denotes the end of the struct. That mark is a delta=0
-value, which has representation (00).
+field has the zero value for its type (except for arrays; see above) or it's a
+pointer to a zero value, it is omitted from the transmission. The field number
+is defined by the type of the encoded struct: the first field of the encoded type
+is field 0, the second is field 1, etc. When encoding a value, the field numbers
+are delta encoded for efficiency and the fields are always sent in order of
+increasing field number; the deltas are therefore unsigned. The initialization
+for the delta encoding sets the field number to -1, so an unsigned integer field 0
+with value 7 is transmitted as unsigned delta = 1, unsigned value = 7 or (01 07).
+Finally, after all the fields have been sent a terminating mark denotes the end
+of the struct. That mark is a delta=0 value, which has representation (00).
Interface types are not checked for compatibility; all interface types are
treated, for transmission, as members of a single "interface" type, analogous to
diff --git a/src/errors/join.go b/src/errors/join.go
index 08a79867c6..730bf7043c 100644
--- a/src/errors/join.go
+++ b/src/errors/join.go
@@ -27,16 +27,6 @@ func Join(errs ...error) error {
if n == 0 {
return nil
}
- if n == 1 {
- for _, err := range errs {
- if _, ok := err.(interface {
- Unwrap() []error
- }); ok {
- return err
- }
- }
- }
-
e := &joinError{
errs: make([]error, 0, n),
}
diff --git a/src/errors/join_test.go b/src/errors/join_test.go
index 439b372ca0..8ee4d7f77b 100644
--- a/src/errors/join_test.go
+++ b/src/errors/join_test.go
@@ -25,6 +25,7 @@ func TestJoinReturnsNil(t *testing.T) {
func TestJoin(t *testing.T) {
err1 := errors.New("err1")
err2 := errors.New("err2")
+ merr := multiErr{errors.New("err3")}
for _, test := range []struct {
errs []error
want []error
@@ -37,6 +38,9 @@ func TestJoin(t *testing.T) {
}, {
errs: []error{err1, nil, err2},
want: []error{err1, err2},
+ }, {
+ errs: []error{merr},
+ want: []error{merr},
}} {
got := errors.Join(test.errs...).(interface{ Unwrap() []error }).Unwrap()
if !reflect.DeepEqual(got, test.want) {
@@ -70,37 +74,3 @@ func TestJoinErrorMethod(t *testing.T) {
}
}
}
-
-func BenchmarkJoin(b *testing.B) {
- for _, bb := range []struct {
- name string
- errs []error
- }{
- {
- name: "no error",
- },
- {
- name: "single non-nil error",
- errs: []error{errors.New("err")},
- },
- {
- name: "multiple errors",
- errs: []error{errors.New("err"), errors.New("newerr"), errors.New("newerr2")},
- },
- {
- name: "unwrappable single error",
- errs: []error{errors.Join(errors.New("err"))},
- },
- {
- name: "nil first error",
- errs: []error{nil, errors.New("newerr")},
- },
- } {
- b.Run(bb.name, func(b *testing.B) {
- b.ReportAllocs()
- for i := 0; i < b.N; i++ {
- _ = errors.Join(bb.errs...)
- }
- })
- }
-}
diff --git a/src/go/doc/comment_test.go b/src/go/doc/comment_test.go
index 0e7de3eb78..2569e73c7c 100644
--- a/src/go/doc/comment_test.go
+++ b/src/go/doc/comment_test.go
@@ -24,12 +24,12 @@ func TestComment(t *testing.T) {
pkg := New(pkgs["pkgdoc"], "testdata/pkgdoc", 0)
var (
- input = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n"
- wantHTML = `<p><a href="#T">T</a> and <a href="#U">U</a> are types, and <a href="#T.M">T.M</a> is a method, but [V] is a broken link. <a href="/math/rand#Int">rand.Int</a> and <a href="/crypto/rand#Reader">crand.Reader</a> are things. <a href="#G.M1">G.M1</a> and <a href="#G.M2">G.M2</a> are generic methods. <a href="#I.F">I.F</a> is an interface method and [I.V] is a broken link.` + "\n"
- wantOldHTML = "<p>[T] and [U] are <i>types</i>, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n"
- wantMarkdown = "[T](#T) and [U](#U) are types, and [T.M](#T.M) is a method, but \\[V] is a broken link. [rand.Int](/math/rand#Int) and [crand.Reader](/crypto/rand#Reader) are things. [G.M1](#G.M1) and [G.M2](#G.M2) are generic methods. [I.F](#I.F) is an interface method and \\[I.V] is a broken link.\n"
- wantText = "T and U are types, and T.M is a method, but [V] is a broken link. rand.Int and\ncrand.Reader are things. G.M1 and G.M2 are generic methods. I.F is an interface\nmethod and [I.V] is a broken link.\n"
- wantOldText = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link.\n[rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods.\n[I.F] is an interface method and [I.V] is a broken link.\n"
+ input = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.X] is a field, [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n"
+ wantHTML = `<p><a href="#T">T</a> and <a href="#U">U</a> are types, and <a href="#T.M">T.M</a> is a method, but [V] is a broken link. <a href="/math/rand#Int">rand.Int</a> and <a href="/crypto/rand#Reader">crand.Reader</a> are things. <a href="#G.X">G.X</a> is a field, <a href="#G.M1">G.M1</a> and <a href="#G.M2">G.M2</a> are generic methods. <a href="#I.F">I.F</a> is an interface method and [I.V] is a broken link.` + "\n"
+ wantOldHTML = "<p>[T] and [U] are <i>types</i>, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.X] is a field, [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n"
+ wantMarkdown = "[T](#T) and [U](#U) are types, and [T.M](#T.M) is a method, but \\[V] is a broken link. [rand.Int](/math/rand#Int) and [crand.Reader](/crypto/rand#Reader) are things. [G.X](#G.X) is a field, [G.M1](#G.M1) and [G.M2](#G.M2) are generic methods. [I.F](#I.F) is an interface method and \\[I.V] is a broken link.\n"
+ wantText = "T and U are types, and T.M is a method, but [V] is a broken link. rand.Int and\ncrand.Reader are things. G.X is a field, G.M1 and G.M2 are generic methods.\nI.F is an interface method and [I.V] is a broken link.\n"
+ wantOldText = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link.\n[rand.Int] and [crand.Reader] are things. [G.X] is a field, [G.M1] and [G.M2]\nare generic methods. [I.F] is an interface method and [I.V] is a broken link.\n"
wantSynopsis = "T and U are types, and T.M is a method, but [V] is a broken link."
wantOldSynopsis = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link."
)
diff --git a/src/go/doc/doc.go b/src/go/doc/doc.go
index 0c23f1a46c..8c786896fd 100644
--- a/src/go/doc/doc.go
+++ b/src/go/doc/doc.go
@@ -168,6 +168,7 @@ func (p *Package) collectTypes(types []*Type) {
p.collectFuncs(t.Funcs)
p.collectFuncs(t.Methods)
p.collectInterfaceMethods(t)
+ p.collectStructFields(t)
}
}
@@ -212,6 +213,24 @@ func (p *Package) collectInterfaceMethods(t *Type) {
}
}
+func (p *Package) collectStructFields(t *Type) {
+ for _, s := range t.Decl.Specs {
+ spec, ok := s.(*ast.TypeSpec)
+ if !ok {
+ continue
+ }
+ list, isStruct := fields(spec.Type)
+ if !isStruct {
+ continue
+ }
+ for _, field := range list {
+ for _, name := range field.Names {
+ p.syms[t.Name+"."+name.Name] = true
+ }
+ }
+ }
+}
+
// NewFromFiles computes documentation for a package.
//
// The package is specified by a list of *ast.Files and corresponding
diff --git a/src/go/doc/example.go b/src/go/doc/example.go
index ba1f863df0..8c01bf0a8d 100644
--- a/src/go/doc/example.go
+++ b/src/go/doc/example.go
@@ -74,6 +74,9 @@ func Examples(testFiles ...*ast.File) []*Example {
if params := f.Type.Params; len(params.List) != 0 {
continue // function has params; not a valid example
}
+ if results := f.Type.Results; results != nil && len(results.List) != 0 {
+ continue // function has results; not a valid example
+ }
if f.Body == nil { // ast.File.Body nil dereference (see issue 28044)
continue
}
diff --git a/src/go/doc/example_test.go b/src/go/doc/example_test.go
index 2fd54f8abb..db2b2d34cd 100644
--- a/src/go/doc/example_test.go
+++ b/src/go/doc/example_test.go
@@ -228,6 +228,8 @@ func ExampleFunc1_foo() {}
func ExampleFunc1_foo_suffix() {}
func ExampleFunc1_foo_Suffix() {} // matches Func1, instead of Func1_foo
func Examplefunc1() {} // invalid - cannot match unexported
+func ExampleFunc1_params(a int) {} // invalid - has parameter
+func ExampleFunc1_results() int {} // invalid - has results
func ExampleType1_Func1() {}
func ExampleType1_Func1_() {} // invalid - suffix must start with a lower-case letter
diff --git a/src/go/doc/testdata/pkgdoc/doc.go b/src/go/doc/testdata/pkgdoc/doc.go
index d542dc2cdd..24e127c7fb 100644
--- a/src/go/doc/testdata/pkgdoc/doc.go
+++ b/src/go/doc/testdata/pkgdoc/doc.go
@@ -18,7 +18,7 @@ func (T) M() {}
var _ = rand.Int
var _ = crand.Reader
-type G[T any] struct{ x T }
+type G[T any] struct{ X T }
func (g G[T]) M1() {}
func (g *G[T]) M2() {}
diff --git a/src/internal/coverage/decodemeta/decodefile.go b/src/internal/coverage/decodemeta/decodefile.go
index 6f4dd1a3ec..474844bf97 100644
--- a/src/internal/coverage/decodemeta/decodefile.go
+++ b/src/internal/coverage/decodemeta/decodefile.go
@@ -75,7 +75,7 @@ func (r *CoverageMetaFileReader) readFileHeader() error {
// Vet the version. If this is a meta-data file from the future,
// we won't be able to read it.
if r.hdr.Version > coverage.MetaFileVersion {
- return fmt.Errorf("meta-data file withn unknown version %d (expected %d)", r.hdr.Version, coverage.MetaFileVersion)
+ return fmt.Errorf("meta-data file with an unknown version %d (expected %d)", r.hdr.Version, coverage.MetaFileVersion)
}
// Read package offsets for good measure
diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go
index 4610ce807e..711fb045c3 100644
--- a/src/internal/cpu/cpu_x86.go
+++ b/src/internal/cpu/cpu_x86.go
@@ -219,7 +219,7 @@ func doinit() {
if eax7 >= 1 {
eax71, _, _, _ := cpuid(7, 1)
if X86.HasAVX {
- X86.HasAVXVNNI = isSet(4, eax71)
+ X86.HasAVXVNNI = isSet(eax71, cpuid_AVXVNNI)
}
}
diff --git a/src/net/rpc/server.go b/src/net/rpc/server.go
index 4233a426fe..961145c6f2 100644
--- a/src/net/rpc/server.go
+++ b/src/net/rpc/server.go
@@ -202,7 +202,7 @@ func NewServer() *Server {
// DefaultServer is the default instance of [*Server].
var DefaultServer = NewServer()
-// Is this type exported or a builtin?
+// isExportedOrBuiltinType reports whether t is an exported or builtin type
func isExportedOrBuiltinType(t reflect.Type) bool {
for t.Kind() == reflect.Pointer {
t = t.Elem()
diff --git a/src/os/exec/exec.go b/src/os/exec/exec.go
index e84ebfc453..aa7a6be7f0 100644
--- a/src/os/exec/exec.go
+++ b/src/os/exec/exec.go
@@ -102,6 +102,7 @@ import (
"runtime"
"strconv"
"strings"
+ "sync/atomic"
"syscall"
"time"
)
@@ -354,6 +355,11 @@ type Cmd struct {
// the work of resolving the extension, so Start doesn't need to do it again.
// This is only used on Windows.
cachedLookExtensions struct{ in, out string }
+
+ // startCalled records that Start was attempted, regardless of outcome.
+ // (Until go.dev/issue/77075 is resolved, we use atomic.SwapInt32,
+ // not atomic.Bool.Swap, to avoid triggering the copylocks vet check.)
+ startCalled int32
}
// A ctxResult reports the result of watching the Context associated with a
@@ -635,7 +641,8 @@ func (c *Cmd) Run() error {
func (c *Cmd) Start() error {
// Check for doubled Start calls before we defer failure cleanup. If the prior
// call to Start succeeded, we don't want to spuriously close its pipes.
- if c.Process != nil {
+ // It is an error to call Start twice even if the first call did not create a process.
+ if atomic.SwapInt32(&c.startCalled, 1) != 0 {
return errors.New("exec: already started")
}
@@ -647,6 +654,7 @@ func (c *Cmd) Start() error {
if !started {
closeDescriptors(c.parentIOPipes)
c.parentIOPipes = nil
+ c.goroutine = nil // aid GC, finalization of pipe fds
}
}()
diff --git a/src/os/exec/exec_test.go b/src/os/exec/exec_test.go
index 1decebdc22..bf2f3da535 100644
--- a/src/os/exec/exec_test.go
+++ b/src/os/exec/exec_test.go
@@ -1839,3 +1839,29 @@ func TestAbsPathExec(t *testing.T) {
}
})
}
+
+// Calling Start twice is an error, regardless of outcome.
+func TestStart_twice(t *testing.T) {
+ testenv.MustHaveExec(t)
+
+ cmd := exec.Command("/bin/nonesuch")
+ for i, want := range []string{
+ cond(runtime.GOOS == "windows",
+ `exec: "/bin/nonesuch": executable file not found in %PATH%`,
+ "fork/exec /bin/nonesuch: no such file or directory"),
+ "exec: already started",
+ } {
+ err := cmd.Start()
+ if got := fmt.Sprint(err); got != want {
+ t.Errorf("Start call #%d return err %q, want %q", i+1, got, want)
+ }
+ }
+}
+
+func cond[T any](cond bool, t, f T) T {
+ if cond {
+ return t
+ } else {
+ return f
+ }
+}
diff --git a/src/reflect/value.go b/src/reflect/value.go
index 7f0ec2a397..8c8acbaa9a 100644
--- a/src/reflect/value.go
+++ b/src/reflect/value.go
@@ -362,6 +362,7 @@ func (v Value) CanSet() bool {
// type of the function's corresponding input parameter.
// If v is a variadic function, Call creates the variadic slice parameter
// itself, copying in the corresponding values.
+// It panics if the Value was obtained by accessing unexported struct fields.
func (v Value) Call(in []Value) []Value {
v.mustBe(Func)
v.mustBeExported()
@@ -375,6 +376,7 @@ func (v Value) Call(in []Value) []Value {
// It returns the output results as Values.
// As in Go, each input argument must be assignable to the
// type of the function's corresponding input parameter.
+// It panics if the Value was obtained by accessing unexported struct fields.
func (v Value) CallSlice(in []Value) []Value {
v.mustBe(Func)
v.mustBeExported()
diff --git a/src/regexp/find_test.go b/src/regexp/find_test.go
index 49e9619cef..5b446c29cb 100644
--- a/src/regexp/find_test.go
+++ b/src/regexp/find_test.go
@@ -159,23 +159,23 @@ func TestFind(t *testing.T) {
for _, test := range findTests {
re := MustCompile(test.pat)
if re.String() != test.pat {
- t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
+ t.Errorf("re.String() = %q, want %q", re.String(), test.pat)
}
result := re.Find([]byte(test.text))
switch {
case len(test.matches) == 0 && len(result) == 0:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %q, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case test.matches != nil && result != nil:
- expect := test.text[test.matches[0][0]:test.matches[0][1]]
+ want := test.text[test.matches[0][0]:test.matches[0][1]]
if len(result) != cap(result) {
- t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
+ t.Errorf("got capacity %d, want %d: %s", cap(result), len(result), test)
}
- if expect != string(result) {
- t.Errorf("expected %q got %q: %s", expect, result, test)
+ if want != string(result) {
+ t.Errorf("got %q, want %q: %s", result, want, test)
}
}
}
@@ -188,16 +188,16 @@ func TestFindString(t *testing.T) {
case len(test.matches) == 0 && len(result) == 0:
// ok
case test.matches == nil && result != "":
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %q, want none: %s", result, test)
case test.matches != nil && result == "":
// Tricky because an empty result has two meanings: no match or empty match.
if test.matches[0][0] != test.matches[0][1] {
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
}
case test.matches != nil && result != "":
- expect := test.text[test.matches[0][0]:test.matches[0][1]]
- if expect != result {
- t.Errorf("expected %q got %q: %s", expect, result, test)
+ want := test.text[test.matches[0][0]:test.matches[0][1]]
+ if want != result {
+ t.Errorf("got %q, want %q: %s", result, want, test)
}
}
}
@@ -208,13 +208,13 @@ func testFindIndex(test *FindTest, result []int, t *testing.T) {
case len(test.matches) == 0 && len(result) == 0:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %v, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case test.matches != nil && result != nil:
- expect := test.matches[0]
- if expect[0] != result[0] || expect[1] != result[1] {
- t.Errorf("expected %v got %v: %s", expect, result, test)
+ want := test.matches[0]
+ if want[0] != result[0] || want[1] != result[1] {
+ t.Errorf("got %v, want %v: %s", result, want, test)
}
}
}
@@ -246,22 +246,22 @@ func TestFindAll(t *testing.T) {
case test.matches == nil && result == nil:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %q, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Fatalf("expected match; got none: %s", test)
+ t.Fatalf("got no match, want one: %s", test)
case test.matches != nil && result != nil:
if len(test.matches) != len(result) {
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+ t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test)
continue
}
for k, e := range test.matches {
got := result[k]
if len(got) != cap(got) {
- t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
+ t.Errorf("match %d: got capacity %d, want %d: %s", k, cap(got), len(got), test)
}
- expect := test.text[e[0]:e[1]]
- if expect != string(got) {
- t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
+ want := test.text[e[0]:e[1]]
+ if want != string(got) {
+ t.Errorf("match %d: got %q, want %q: %s", k, got, want, test)
}
}
}
@@ -275,18 +275,18 @@ func TestFindAllString(t *testing.T) {
case test.matches == nil && result == nil:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %q, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case test.matches != nil && result != nil:
if len(test.matches) != len(result) {
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+ t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test)
continue
}
for k, e := range test.matches {
- expect := test.text[e[0]:e[1]]
- if expect != result[k] {
- t.Errorf("expected %q got %q: %s", expect, result, test)
+ want := test.text[e[0]:e[1]]
+ if want != result[k] {
+ t.Errorf("got %q, want %q: %s", result[k], want, test)
}
}
}
@@ -298,17 +298,17 @@ func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
case test.matches == nil && result == nil:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %v, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case test.matches != nil && result != nil:
if len(test.matches) != len(result) {
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+ t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test)
return
}
for k, e := range test.matches {
if e[0] != result[k][0] || e[1] != result[k][1] {
- t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
+ t.Errorf("match %d: got %v, want %v: %s", k, result[k], e, test)
}
}
}
@@ -330,24 +330,24 @@ func TestFindAllStringIndex(t *testing.T) {
func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
if len(submatches) != len(result)*2 {
- t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
+ t.Errorf("match %d: got %d submatches, want %d: %s", n, len(result), len(submatches)/2, test)
return
}
for k := 0; k < len(submatches); k += 2 {
if submatches[k] == -1 {
if result[k/2] != nil {
- t.Errorf("match %d: expected nil got %q: %s", n, result, test)
+ t.Errorf("match %d: got %q, want nil: %s", n, result, test)
}
continue
}
got := result[k/2]
if len(got) != cap(got) {
- t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
+ t.Errorf("match %d: got capacity %d, want %d: %s", n, cap(got), len(got), test)
return
}
- expect := test.text[submatches[k]:submatches[k+1]]
- if expect != string(got) {
- t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
+ want := test.text[submatches[k]:submatches[k+1]]
+ if want != string(got) {
+ t.Errorf("match %d: got %q, want %q: %s", n, got, want, test)
return
}
}
@@ -360,9 +360,9 @@ func TestFindSubmatch(t *testing.T) {
case test.matches == nil && result == nil:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %q, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case test.matches != nil && result != nil:
testSubmatchBytes(&test, 0, test.matches[0], result, t)
}
@@ -371,19 +371,19 @@ func TestFindSubmatch(t *testing.T) {
func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
if len(submatches) != len(result)*2 {
- t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
+ t.Errorf("match %d: got %d submatches, want %d: %s", n, len(result), len(submatches)/2, test)
return
}
for k := 0; k < len(submatches); k += 2 {
if submatches[k] == -1 {
if result[k/2] != "" {
- t.Errorf("match %d: expected nil got %q: %s", n, result, test)
+ t.Errorf("match %d: got %q, want empty string: %s", n, result, test)
}
continue
}
- expect := test.text[submatches[k]:submatches[k+1]]
- if expect != result[k/2] {
- t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
+ want := test.text[submatches[k]:submatches[k+1]]
+ if want != result[k/2] {
+ t.Errorf("match %d: got %q, want %q: %s", n, result[k/2], want, test)
return
}
}
@@ -396,23 +396,23 @@ func TestFindStringSubmatch(t *testing.T) {
case test.matches == nil && result == nil:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %q, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case test.matches != nil && result != nil:
testSubmatchString(&test, 0, test.matches[0], result, t)
}
}
}
-func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
- if len(expect) != len(result) {
- t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
+func testSubmatchIndices(test *FindTest, n int, want, result []int, t *testing.T) {
+ if len(want) != len(result) {
+ t.Errorf("match %d: got %d matches, want %d: %s", n, len(result)/2, len(want)/2, test)
return
}
- for k, e := range expect {
+ for k, e := range want {
if e != result[k] {
- t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
+ t.Errorf("match %d: submatch error: got %v, want %v: %s", n, result, want, test)
}
}
}
@@ -422,9 +422,9 @@ func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
case test.matches == nil && result == nil:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %v, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case test.matches != nil && result != nil:
testSubmatchIndices(test, 0, test.matches[0], result, t)
}
@@ -457,11 +457,11 @@ func TestFindAllSubmatch(t *testing.T) {
case test.matches == nil && result == nil:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %q, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case len(test.matches) != len(result):
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+ t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test)
case test.matches != nil && result != nil:
for k, match := range test.matches {
testSubmatchBytes(&test, k, match, result[k], t)
@@ -477,11 +477,11 @@ func TestFindAllStringSubmatch(t *testing.T) {
case test.matches == nil && result == nil:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %q, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case len(test.matches) != len(result):
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+ t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test)
case test.matches != nil && result != nil:
for k, match := range test.matches {
testSubmatchString(&test, k, match, result[k], t)
@@ -495,11 +495,11 @@ func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
case test.matches == nil && result == nil:
// ok
case test.matches == nil && result != nil:
- t.Errorf("expected no match; got one: %s", test)
+ t.Errorf("got match %v, want none: %s", result, test)
case test.matches != nil && result == nil:
- t.Errorf("expected match; got none: %s", test)
+ t.Errorf("got no match, want one: %s", test)
case len(test.matches) != len(result):
- t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
+ t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test)
case test.matches != nil && result != nil:
for k, match := range test.matches {
testSubmatchIndices(test, k, match, result[k], t)
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index fd79356aba..c08bc7574b 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1213,7 +1213,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if goexperiment.RuntimeSecret && gp.secret > 0 {
// Mark any object allocated while in secret mode as secret.
// This ensures we zero it immediately when freeing it.
- addSecret(x)
+ addSecret(x, size)
}
// Notify sanitizers, if enabled.
diff --git a/src/runtime/malloc_generated.go b/src/runtime/malloc_generated.go
index cf329d2696..2be6a5b6f5 100644
--- a/src/runtime/malloc_generated.go
+++ b/src/runtime/malloc_generated.go
@@ -156,7 +156,7 @@ func mallocgcSmallScanNoHeaderSC1(size uintptr, typ *_type, needzero bool) unsaf
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -321,7 +321,7 @@ func mallocgcSmallScanNoHeaderSC2(size uintptr, typ *_type, needzero bool) unsaf
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -486,7 +486,7 @@ func mallocgcSmallScanNoHeaderSC3(size uintptr, typ *_type, needzero bool) unsaf
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -651,7 +651,7 @@ func mallocgcSmallScanNoHeaderSC4(size uintptr, typ *_type, needzero bool) unsaf
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -816,7 +816,7 @@ func mallocgcSmallScanNoHeaderSC5(size uintptr, typ *_type, needzero bool) unsaf
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -981,7 +981,7 @@ func mallocgcSmallScanNoHeaderSC6(size uintptr, typ *_type, needzero bool) unsaf
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -1146,7 +1146,7 @@ func mallocgcSmallScanNoHeaderSC7(size uintptr, typ *_type, needzero bool) unsaf
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -1311,7 +1311,7 @@ func mallocgcSmallScanNoHeaderSC8(size uintptr, typ *_type, needzero bool) unsaf
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -1476,7 +1476,7 @@ func mallocgcSmallScanNoHeaderSC9(size uintptr, typ *_type, needzero bool) unsaf
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -1641,7 +1641,7 @@ func mallocgcSmallScanNoHeaderSC10(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -1806,7 +1806,7 @@ func mallocgcSmallScanNoHeaderSC11(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -1971,7 +1971,7 @@ func mallocgcSmallScanNoHeaderSC12(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -2136,7 +2136,7 @@ func mallocgcSmallScanNoHeaderSC13(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -2301,7 +2301,7 @@ func mallocgcSmallScanNoHeaderSC14(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -2466,7 +2466,7 @@ func mallocgcSmallScanNoHeaderSC15(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -2631,7 +2631,7 @@ func mallocgcSmallScanNoHeaderSC16(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -2796,7 +2796,7 @@ func mallocgcSmallScanNoHeaderSC17(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -2961,7 +2961,7 @@ func mallocgcSmallScanNoHeaderSC18(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -3126,7 +3126,7 @@ func mallocgcSmallScanNoHeaderSC19(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -3291,7 +3291,7 @@ func mallocgcSmallScanNoHeaderSC20(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -3456,7 +3456,7 @@ func mallocgcSmallScanNoHeaderSC21(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -3621,7 +3621,7 @@ func mallocgcSmallScanNoHeaderSC22(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -3786,7 +3786,7 @@ func mallocgcSmallScanNoHeaderSC23(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -3951,7 +3951,7 @@ func mallocgcSmallScanNoHeaderSC24(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -4116,7 +4116,7 @@ func mallocgcSmallScanNoHeaderSC25(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -4281,7 +4281,7 @@ func mallocgcSmallScanNoHeaderSC26(size uintptr, typ *_type, needzero bool) unsa
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -6686,7 +6686,7 @@ func mallocgcSmallNoScanSC2(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -6757,7 +6757,7 @@ func mallocgcSmallNoScanSC2(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -6822,7 +6822,7 @@ func mallocgcSmallNoScanSC3(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -6893,7 +6893,7 @@ func mallocgcSmallNoScanSC3(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -6958,7 +6958,7 @@ func mallocgcSmallNoScanSC4(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7029,7 +7029,7 @@ func mallocgcSmallNoScanSC4(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7094,7 +7094,7 @@ func mallocgcSmallNoScanSC5(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7165,7 +7165,7 @@ func mallocgcSmallNoScanSC5(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7230,7 +7230,7 @@ func mallocgcSmallNoScanSC6(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7301,7 +7301,7 @@ func mallocgcSmallNoScanSC6(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7366,7 +7366,7 @@ func mallocgcSmallNoScanSC7(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7437,7 +7437,7 @@ func mallocgcSmallNoScanSC7(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7502,7 +7502,7 @@ func mallocgcSmallNoScanSC8(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7573,7 +7573,7 @@ func mallocgcSmallNoScanSC8(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7638,7 +7638,7 @@ func mallocgcSmallNoScanSC9(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7709,7 +7709,7 @@ func mallocgcSmallNoScanSC9(size uintptr, typ *_type, needzero bool) unsafe.Poin
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7774,7 +7774,7 @@ func mallocgcSmallNoScanSC10(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7845,7 +7845,7 @@ func mallocgcSmallNoScanSC10(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7910,7 +7910,7 @@ func mallocgcSmallNoScanSC11(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -7981,7 +7981,7 @@ func mallocgcSmallNoScanSC11(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8046,7 +8046,7 @@ func mallocgcSmallNoScanSC12(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8117,7 +8117,7 @@ func mallocgcSmallNoScanSC12(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8182,7 +8182,7 @@ func mallocgcSmallNoScanSC13(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8253,7 +8253,7 @@ func mallocgcSmallNoScanSC13(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8318,7 +8318,7 @@ func mallocgcSmallNoScanSC14(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8389,7 +8389,7 @@ func mallocgcSmallNoScanSC14(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8454,7 +8454,7 @@ func mallocgcSmallNoScanSC15(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8525,7 +8525,7 @@ func mallocgcSmallNoScanSC15(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8590,7 +8590,7 @@ func mallocgcSmallNoScanSC16(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8661,7 +8661,7 @@ func mallocgcSmallNoScanSC16(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8726,7 +8726,7 @@ func mallocgcSmallNoScanSC17(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8797,7 +8797,7 @@ func mallocgcSmallNoScanSC17(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8862,7 +8862,7 @@ func mallocgcSmallNoScanSC18(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8933,7 +8933,7 @@ func mallocgcSmallNoScanSC18(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -8998,7 +8998,7 @@ func mallocgcSmallNoScanSC19(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9069,7 +9069,7 @@ func mallocgcSmallNoScanSC19(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9134,7 +9134,7 @@ func mallocgcSmallNoScanSC20(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9205,7 +9205,7 @@ func mallocgcSmallNoScanSC20(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9270,7 +9270,7 @@ func mallocgcSmallNoScanSC21(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9341,7 +9341,7 @@ func mallocgcSmallNoScanSC21(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9406,7 +9406,7 @@ func mallocgcSmallNoScanSC22(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9477,7 +9477,7 @@ func mallocgcSmallNoScanSC22(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9542,7 +9542,7 @@ func mallocgcSmallNoScanSC23(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9613,7 +9613,7 @@ func mallocgcSmallNoScanSC23(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9678,7 +9678,7 @@ func mallocgcSmallNoScanSC24(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9749,7 +9749,7 @@ func mallocgcSmallNoScanSC24(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9814,7 +9814,7 @@ func mallocgcSmallNoScanSC25(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9885,7 +9885,7 @@ func mallocgcSmallNoScanSC25(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -9950,7 +9950,7 @@ func mallocgcSmallNoScanSC26(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
@@ -10021,7 +10021,7 @@ func mallocgcSmallNoScanSC26(size uintptr, typ *_type, needzero bool) unsafe.Poi
gp := getg()
if goexperiment.RuntimeSecret && gp.secret > 0 {
- addSecret(x)
+ addSecret(x, size)
}
if valgrindenabled {
diff --git a/src/runtime/malloc_stubs.go b/src/runtime/malloc_stubs.go
index 8c424935bf..b395172e4b 100644
--- a/src/runtime/malloc_stubs.go
+++ b/src/runtime/malloc_stubs.go
@@ -101,7 +101,7 @@ func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if goexperiment.RuntimeSecret && gp.secret > 0 {
// Mark any object allocated while in secret mode as secret.
// This ensures we zero it immediately when freeing it.
- addSecret(x)
+ addSecret(x, size)
}
}
diff --git a/src/runtime/mcleanup_test.go b/src/runtime/mcleanup_test.go
index 5afe85e103..dfc688a0f2 100644
--- a/src/runtime/mcleanup_test.go
+++ b/src/runtime/mcleanup_test.go
@@ -331,9 +331,14 @@ func TestCleanupLost(t *testing.T) {
}
wg.Wait()
runtime.GC()
- runtime.BlockUntilEmptyCleanupQueue(int64(10 * time.Second))
+ timeout := 10 * time.Second
+ empty := runtime.BlockUntilEmptyCleanupQueue(int64(timeout))
+ if !empty {
+ t.Errorf("failed to drain cleanup queue within %s", timeout)
+ }
+
if got := int(got.Load()); got != want {
- t.Errorf("expected %d cleanups to be executed, got %d", got, want)
+ t.Errorf("%d cleanups executed, expected %d", got, want)
}
}
diff --git a/src/runtime/metrics_cgo_test.go b/src/runtime/metrics_cgo_test.go
index 6cc9d23195..ef1e3dd71d 100644
--- a/src/runtime/metrics_cgo_test.go
+++ b/src/runtime/metrics_cgo_test.go
@@ -12,7 +12,7 @@ import (
"testing"
)
-func TestNotInGoMetricCallback(t *testing.T) {
+func TestNotInGoMetric(t *testing.T) {
switch runtime.GOOS {
case "windows", "plan9":
t.Skip("unsupported on Windows and Plan9")
@@ -22,11 +22,22 @@ func TestNotInGoMetricCallback(t *testing.T) {
}
}
- // This test is run in a subprocess to prevent other tests from polluting the metrics
- // and because we need to make some cgo callbacks.
- output := runTestProg(t, "testprogcgo", "NotInGoMetricCallback")
- want := "OK\n"
- if output != want {
- t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
+ run := func(t *testing.T, name string) {
+ // This test is run in a subprocess to prevent other tests from polluting the metrics
+ // and because we need to make some cgo callbacks.
+ output := runTestProg(t, "testprogcgo", name)
+ want := "OK\n"
+ if output != want {
+ t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want)
+ }
}
+ t.Run("CgoCall", func(t *testing.T) {
+ run(t, "NotInGoMetricCgoCall")
+ })
+ t.Run("CgoCallback", func(t *testing.T) {
+ run(t, "NotInGoMetricCgoCallback")
+ })
+ t.Run("CgoCallAndCallback", func(t *testing.T) {
+ run(t, "NotInGoMetricCgoCallAndCallback")
+ })
}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 61dc5457fc..68dfca4668 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -2745,6 +2745,14 @@ type specialPinCounter struct {
counter uintptr
}
+// specialSecret tracks whether we need to zero an object immediately
+// upon freeing.
+type specialSecret struct {
+ _ sys.NotInHeap
+ special special
+ size uintptr
+}
+
// specialsIter helps iterate over specials lists.
type specialsIter struct {
pprev **special
@@ -2775,6 +2783,12 @@ func (i *specialsIter) unlinkAndNext() *special {
// freeSpecial performs any cleanup on special s and deallocates it.
// s must already be unlinked from the specials list.
+// TODO(mknyszek): p and size together DO NOT represent a valid allocation.
+// size is the size of the allocation block in the span (mspan.elemsize), and p is
+// whatever pointer the special was attached to, which need not point to the
+// beginning of the block, though it may.
+// Consider passing the arguments differently to avoid giving the impression
+// that p and size together represent an address range.
func freeSpecial(s *special, p unsafe.Pointer, size uintptr) {
switch s.kind {
case _KindSpecialFinalizer:
@@ -2828,7 +2842,19 @@ func freeSpecial(s *special, p unsafe.Pointer, size uintptr) {
mheap_.specialBubbleAlloc.free(unsafe.Pointer(st))
unlock(&mheap_.speciallock)
case _KindSpecialSecret:
- memclrNoHeapPointers(p, size)
+ ss := (*specialSecret)(unsafe.Pointer(s))
+ // p is the actual byte location that the special was
+ // attached to, but the size argument is the span
+ // element size. If we were to zero out using the size
+ // argument, we'd trounce over adjacent memory in cases
+ // where the allocation contains a header. Hence, we use
+ // the user-visible size which we stash in the special itself.
+ //
+ // p always points to the beginning of the user-visible
+ // allocation since the only way to attach a secret special
+ // is via the allocation path. This isn't universal for
+ // tiny allocs, but we avoid them in mallocgc anyway.
+ memclrNoHeapPointers(p, ss.size)
lock(&mheap_.speciallock)
mheap_.specialSecretAlloc.free(unsafe.Pointer(s))
unlock(&mheap_.speciallock)
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 5ea96f03f5..005c875cbf 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2455,8 +2455,16 @@ func needm(signal bool) {
// mp.curg is now a real goroutine.
casgstatus(mp.curg, _Gdeadextra, _Gsyscall)
sched.ngsys.Add(-1)
- // N.B. We do not update nGsyscallNoP, because isExtraInC threads are not
- // counted as real goroutines while they're in C.
+
+ // This is technically inaccurate, but we set isExtraInC to false above,
+ // and so we need to update addGSyscallNoP to keep the two pieces of state
+ // consistent (it's only updated when isExtraInC is false). More specifically,
+ // When we get to cgocallbackg and exitsyscall, we'll be looking for a P, and
+ // since isExtraInC is false, we will decrement this metric.
+ //
+ // The inaccuracy is thankfully transient: only until this thread can get a P.
+ // We're going into Go anyway, so it's okay to pretend we're a real goroutine now.
+ addGSyscallNoP(mp)
if !signal {
if trace.ok() {
@@ -5027,7 +5035,7 @@ func exitsyscallTryGetP(oldp *p) *p {
if oldp != nil {
if thread, ok := setBlockOnExitSyscall(oldp); ok {
thread.takeP()
- addGSyscallNoP(thread.mp) // takeP does the opposite, but this is a net zero change.
+ decGSyscallNoP(getg().m) // We got a P for ourselves.
thread.resume()
return oldp
}
diff --git a/src/runtime/rt0_freebsd_arm64.s b/src/runtime/rt0_freebsd_arm64.s
index a7a952664e..93562c5dd0 100644
--- a/src/runtime/rt0_freebsd_arm64.s
+++ b/src/runtime/rt0_freebsd_arm64.s
@@ -4,9 +4,12 @@
#include "textflag.h"
-// On FreeBSD argc/argv are passed in R0, not RSP
+// FreeBSD passes a pointer to the argument block in R0, not RSP,
+// so _rt0_arm64 cannot be used.
TEXT _rt0_arm64_freebsd(SB),NOSPLIT,$0
- JMP _rt0_arm64(SB)
+ ADD $8, R0, R1 // argv (use R0 while it's still the pointer)
+ MOVD 0(R0), R0 // argc
+ JMP runtime·rt0_go(SB)
// When building with -buildmode=c-shared, this symbol is called when the shared
// library is loaded.
diff --git a/src/runtime/secret.go b/src/runtime/secret.go
index 4c199d31d0..8aad63b54f 100644
--- a/src/runtime/secret.go
+++ b/src/runtime/secret.go
@@ -55,15 +55,9 @@ func secret_eraseSecrets() {
// Don't put any code here: the stack frame's contents are gone!
}
-// specialSecret tracks whether we need to zero an object immediately
-// upon freeing.
-type specialSecret struct {
- special special
-}
-
// addSecret records the fact that we need to zero p immediately
// when it is freed.
-func addSecret(p unsafe.Pointer) {
+func addSecret(p unsafe.Pointer, size uintptr) {
// TODO(dmo): figure out the cost of these. These are mostly
// intended to catch allocations that happen via the runtime
// that the user has no control over and not big buffers that user
@@ -72,6 +66,7 @@ func addSecret(p unsafe.Pointer) {
lock(&mheap_.speciallock)
s := (*specialSecret)(mheap_.specialSecretAlloc.alloc())
s.special.kind = _KindSpecialSecret
+ s.size = size
unlock(&mheap_.speciallock)
addspecial(p, &s.special, false)
}
diff --git a/src/runtime/secret/alloc_test.go b/src/runtime/secret/alloc_test.go
new file mode 100644
index 0000000000..8f82dad4b5
--- /dev/null
+++ b/src/runtime/secret/alloc_test.go
@@ -0,0 +1,39 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.runtimesecret && (arm64 || amd64) && linux
+
+package secret_test
+
+import (
+ "runtime"
+ "runtime/secret"
+ "testing"
+)
+
+func TestInterleavedAllocFrees(t *testing.T) {
+ // Interleave heap objects that are kept alive beyond secret.Do
+ // with heap objects that do not live past secret.Do.
+ // The intent is for the clearing of one object (with the wrong size)
+ // to clobber the type header of the next slot. If the GC sees a nil type header
+ // when it expects to find one, it can throw.
+ type T struct {
+ p *int
+ x [1024]byte
+ }
+ for range 10 {
+ var s []*T
+ secret.Do(func() {
+ for i := range 100 {
+ t := &T{}
+ if i%2 == 0 {
+ s = append(s, t)
+ }
+ }
+ })
+ runtime.GC()
+ runtime.GC()
+ runtime.KeepAlive(s)
+ }
+}
diff --git a/src/runtime/secret/doc.go b/src/runtime/secret/doc.go
new file mode 100644
index 0000000000..c0dd4f95a6
--- /dev/null
+++ b/src/runtime/secret/doc.go
@@ -0,0 +1,15 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build goexperiment.runtimesecret
+
+// Package secret contains helper functions for zeroing out memory
+// that is otherwise invisible to a user program in the service of
+// forward secrecy. See https://en.wikipedia.org/wiki/Forward_secrecy for
+// more information.
+//
+// This package (runtime/secret) is experimental,
+// and not subject to the Go 1 compatibility promise.
+// It only exists when building with the GOEXPERIMENT=runtimesecret environment variable set.
+package secret
diff --git a/src/runtime/secret/secret.go b/src/runtime/secret/secret.go
index 9eae22605f..00a03b2d50 100644
--- a/src/runtime/secret/secret.go
+++ b/src/runtime/secret/secret.go
@@ -18,12 +18,23 @@ import (
// entire call tree initiated by f.)
// - Any registers used by f are erased before Do returns.
// - Any stack used by f is erased before Do returns.
-// - Any heap allocation done by f is erased as soon as the garbage
-// collector realizes that it is no longer reachable.
+// - Heap allocations done by f are erased as soon as the garbage
+// collector realizes that all allocated values are no longer reachable.
// - Do works even if f panics or calls runtime.Goexit. As part of
// that, any panic raised by f will appear as if it originates from
// Do itself.
//
+// Users should be cautious of allocating inside Do.
+// Erasing heap memory after Do returns may increase garbage collector sweep times and
+// requires additional memory to keep track of allocations until they are to be erased.
+// These costs can compound when an allocation is done in the service of growing a value,
+// like appending to a slice or inserting into a map. In these cases, the entire new allocation is erased rather
+// than just the secret parts of it.
+//
+// To reduce lifetimes of allocations and avoid unexpected performance issues,
+// if a function invoked by Do needs to yield a result that shouldn't be erased,
+// it should do so by copying the result into an allocation created by the caller.
+//
// Limitations:
// - Currently only supported on linux/amd64 and linux/arm64. On unsupported
// platforms, Do will invoke f directly.
diff --git a/src/runtime/secret/secret_test.go b/src/runtime/secret/secret_test.go
index 98d67cf8a4..e2f78c53a0 100644
--- a/src/runtime/secret/secret_test.go
+++ b/src/runtime/secret/secret_test.go
@@ -19,6 +19,7 @@ import (
"testing"
"time"
"unsafe"
+ "weak"
)
type secretType int64
@@ -63,28 +64,33 @@ func heapSTiny() *secretType {
// are freed.
// See runtime/mheap.go:freeSpecial.
func TestHeap(t *testing.T) {
- var u uintptr
+ var addr uintptr
+ var p weak.Pointer[S]
Do(func() {
- u = uintptr(unsafe.Pointer(heapS()))
+ sp := heapS()
+ addr = uintptr(unsafe.Pointer(sp))
+ p = weak.Make(sp)
})
-
- runtime.GC()
+ waitCollected(t, p)
// Check that object got zeroed.
- checkRangeForSecret(t, u, u+unsafe.Sizeof(S{}))
+ checkRangeForSecret(t, addr, addr+unsafe.Sizeof(S{}))
// Also check our stack, just because we can.
checkStackForSecret(t)
}
func TestHeapTiny(t *testing.T) {
- var u uintptr
+ var addr uintptr
+ var p weak.Pointer[secretType]
Do(func() {
- u = uintptr(unsafe.Pointer(heapSTiny()))
+ sp := heapSTiny()
+ addr = uintptr(unsafe.Pointer(sp))
+ p = weak.Make(sp)
})
- runtime.GC()
+ waitCollected(t, p)
// Check that object got zeroed.
- checkRangeForSecret(t, u, u+unsafe.Sizeof(secretType(0)))
+ checkRangeForSecret(t, addr, addr+unsafe.Sizeof(secretType(0)))
// Also check our stack, just because we can.
checkStackForSecret(t)
}
@@ -240,6 +246,20 @@ func checkRangeForSecret(t *testing.T, lo, hi uintptr) {
}
}
+func waitCollected[P any](t *testing.T, ptr weak.Pointer[P]) {
+ t.Helper()
+ i := 0
+ for ptr.Value() != nil {
+ runtime.GC()
+ i++
+ // 20 seems like a decent number of times to try
+ if i > 20 {
+ t.Errorf("value was never collected")
+ }
+ }
+ t.Logf("number of cycles until collection: %d", i)
+}
+
func TestRegisters(t *testing.T) {
Do(func() {
s := makeS()
diff --git a/src/runtime/secret_nosecret.go b/src/runtime/secret_nosecret.go
index bf50fb5a54..0692d6bf70 100644
--- a/src/runtime/secret_nosecret.go
+++ b/src/runtime/secret_nosecret.go
@@ -22,9 +22,7 @@ func secret_dec() {}
//go:linkname secret_eraseSecrets runtime/secret.eraseSecrets
func secret_eraseSecrets() {}
-func addSecret(p unsafe.Pointer) {}
-
-type specialSecret struct{}
+func addSecret(p unsafe.Pointer, size uintptr) {}
//go:linkname secret_getStack runtime/secret.getStack
func secret_getStack() (uintptr, uintptr) { return 0, 0 }
diff --git a/src/runtime/testdata/testprogcgo/notingo.go b/src/runtime/testdata/testprogcgo/notingo.go
index 5af4c00e1f..a385ae24d6 100644
--- a/src/runtime/testdata/testprogcgo/notingo.go
+++ b/src/runtime/testdata/testprogcgo/notingo.go
@@ -12,6 +12,7 @@ package main
#include <pthread.h>
extern void Ready();
+extern void BlockForeverInGo();
static _Atomic int spinning;
static _Atomic int released;
@@ -40,6 +41,21 @@ static void Release() {
atomic_store(&spinning, 0);
atomic_store(&released, 1);
}
+
+static void* enterGoThenWait(void* arg __attribute__ ((unused))) {
+ BlockForeverInGo();
+ return NULL;
+}
+
+static void WaitInGoInNewCThread() {
+ pthread_t tid;
+ pthread_create(&tid, NULL, enterGoThenWait, NULL);
+}
+
+static void SpinForever() {
+ atomic_fetch_add(&spinning, 1);
+ while(1) {};
+}
*/
import "C"
@@ -47,15 +63,62 @@ import (
"os"
"runtime"
"runtime/metrics"
+ "sync/atomic"
)
func init() {
- register("NotInGoMetricCallback", NotInGoMetricCallback)
+ register("NotInGoMetricCgoCall", NotInGoMetricCgoCall)
+ register("NotInGoMetricCgoCallback", NotInGoMetricCgoCallback)
+ register("NotInGoMetricCgoCallAndCallback", NotInGoMetricCgoCallAndCallback)
}
-func NotInGoMetricCallback() {
+// NotInGoMetric just double-checks that N goroutines in cgo count as the metric reading N.
+func NotInGoMetricCgoCall() {
const N = 10
+
+ // Spin up the same number of goroutines that will all wait in a cgo call.
+ for range N {
+ go func() {
+ C.SpinForever()
+ }()
+ }
+
+ // Make sure we're all blocked and spinning.
+ for C.Spinning() < N {
+ }
+
+ // Read not-in-go before taking the Ps back.
s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}}
+ failed := false
+ metrics.Read(s)
+ if n := s[0].Value.Uint64(); n != N {
+ println("pre-STW: expected", N, "not-in-go goroutines, found", n)
+ }
+
+ // Do something that stops the world to take all the Ps back.
+ //
+ // This will force a re-accounting of some of the goroutines and
+ // re-checking not-in-go will help catch bugs.
+ runtime.ReadMemStats(&m)
+
+ // Read not-in-go.
+ metrics.Read(s)
+ if n := s[0].Value.Uint64(); n != N {
+ println("post-STW: expected", N, "not-in-go goroutines, found", n)
+ }
+
+ // Fail if we get a bad reading.
+ if failed {
+ os.Exit(2)
+ }
+ println("OK")
+}
+
+// NotInGoMetricCgoCallback tests that threads that called into Go, then returned
+// to C with *no* Go on the stack, are *not* counted as not-in-go in the
+// runtime/metrics package.
+func NotInGoMetricCgoCallback() {
+ const N = 10
// Create N new C threads that have called into Go at least once.
for range N {
@@ -90,6 +153,7 @@ func NotInGoMetricCallback() {
}
// Read not-in-go.
+ s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}}
metrics.Read(s)
if n := s[0].Value.Uint64(); n != 0 {
println("expected 0 not-in-go goroutines, found", n)
@@ -105,3 +169,69 @@ var readyCh = make(chan bool)
func Ready() {
readyCh <- true
}
+
+// NotInGoMetricCgoCallAndCallback tests that threads that called into Go are not
+// keeping the count of not-in-go threads negative. Specifically, needm sets
+// isExtraInC to false, breaking some of the invariants behind the not-in-go
+// runtime/metrics metric, causing the underlying count to break if we don't
+// account for this. In go.dev/cl/726964 this amounts to nGsyscallNoP being negative.
+// Unfortunately the runtime/metrics package masks a negative nGsyscallNoP because
+// it can transiently go negative due to a race. Therefore, this test checks
+// the condition by making sure not-in-go is positive when we expect it to be.
+// That is, threads in a cgo callback are *not* cancelling out threads in a
+// regular cgo call.
+func NotInGoMetricCgoCallAndCallback() {
+ const N = 10
+
+ // Spin up some threads that will do a cgo callback and just wait in Go.
+ // These threads are the ones we're worried about having the incorrect
+ // accounting that skews the count later.
+ for range N {
+ C.WaitInGoInNewCThread()
+ }
+
+ // Spin up the same number of goroutines that will all wait in a cgo call.
+ for range N {
+ go func() {
+ C.SpinForever()
+ }()
+ }
+
+ // Make sure we're all blocked and spinning.
+ for C.Spinning() < N || blockedForever.Load() < N {
+ }
+
+ // Read not-in-go before taking the Ps back.
+ s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}}
+ failed := false
+ metrics.Read(s)
+ if n := s[0].Value.Uint64(); n != N {
+ println("pre-STW: expected", N, "not-in-go goroutines, found", n)
+ }
+
+ // Do something that stops the world to take all the Ps back.
+ //
+ // This will force a re-accounting of some of the goroutines and
+ // re-checking not-in-go will help catch bugs.
+ runtime.ReadMemStats(&m)
+
+ // Read not-in-go.
+ metrics.Read(s)
+ if n := s[0].Value.Uint64(); n != N {
+ println("post-STW: expected", N, "not-in-go goroutines, found", n)
+ }
+
+ // Fail if we get a bad reading.
+ if failed {
+ os.Exit(2)
+ }
+ println("OK")
+}
+
+var blockedForever atomic.Uint32
+
+//export BlockForeverInGo
+func BlockForeverInGo() {
+ blockedForever.Add(1)
+ select {}
+}
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index a7e8937a05..5f568d205e 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -12,7 +12,7 @@
//
// ## Design
//
-// The basic idea behind the the execution tracer is to have per-M buffers that
+// The basic idea behind the execution tracer is to have per-M buffers that
// trace data may be written into. Each M maintains a write flag indicating whether
// its trace buffer is currently in use.
//
@@ -173,7 +173,7 @@
// doesn't do this directly for performance reasons. The runtime implementation instead caches
// a G on the M created for the C thread. On Linux this M is then cached in the thread's TLS,
// and on other systems, the M is put on a global list on exit from Go. We need to do some
-// extra work to make sure that this is modeled correctly in the the tracer. For example,
+// extra work to make sure that this is modeled correctly in the tracer. For example,
// a C thread exiting Go may leave a P hanging off of its M (whether that M is kept in TLS
// or placed back on a list). In order to correctly model goroutine creation and destruction,
// we must behave as if the P was at some point stolen by the runtime, if the C thread
diff --git a/src/runtime/tracebuf.go b/src/runtime/tracebuf.go
index 5adaede424..1caf69f8b8 100644
--- a/src/runtime/tracebuf.go
+++ b/src/runtime/tracebuf.go
@@ -29,7 +29,7 @@ type traceWriter struct {
*traceBuf
}
-// writer returns an a traceWriter that writes into the current M's stream.
+// writer returns a traceWriter that writes into the current M's stream.
//
// Once this is called, the caller must guard against stack growth until
// end is called on it. Therefore, it's highly recommended to use this
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
index ca4f73c738..dd3a75eb44 100644
--- a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
+++ b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
@@ -30,6 +30,13 @@ func (x simdType) ElemBits() int {
return x.Size / x.Lanes
}
+func (x simdType) Article() string {
+ if strings.HasPrefix(x.Name, "Int") {
+ return "an"
+ }
+ return "a" // Float, Uint
+}
+
// LanesContainer returns the smallest int/uint bit size that is
// large enough to hold one bit for each lane. E.g., Mask32x4
// is 4 lanes, and a uint8 is the smallest uint that has 4 bits.
@@ -86,6 +93,33 @@ func (x simdType) MaskedStoreDoc() string {
}
}
+func (x simdType) ToBitsDoc() string {
+ if x.Size == 512 || x.ElemBits() == 16 {
+ return fmt.Sprintf("// Asm: KMOV%s, CPU Features: AVX512", x.IntelSizeSuffix())
+ }
+ // 128/256 bit vectors with 8, 32, 64 bit elements
+ var asm string
+ var feat string
+ switch x.ElemBits() {
+ case 8:
+ asm = "VPMOVMSKB"
+ if x.Size == 256 {
+ feat = "AVX2"
+ } else {
+ feat = "AVX"
+ }
+ case 32:
+ asm = "VMOVMSKPS"
+ feat = "AVX"
+ case 64:
+ asm = "VMOVMSKPD"
+ feat = "AVX"
+ default:
+ panic("unexpected ElemBits")
+ }
+ return fmt.Sprintf("// Asm: %s, CPU Features: %s", asm, feat)
+}
+
func compareSimdTypes(x, y simdType) int {
// "vreg" then "mask"
if c := -compareNatural(x.Type, y.Type); c != 0 {
@@ -135,7 +169,11 @@ type v{{.}} struct {
{{end}}
{{define "typeTmpl"}}
-// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}
+{{- if eq .Type "mask"}}
+// {{.Name}} is a mask for a SIMD vector of {{.Lanes}} {{.ElemBits}}-bit elements.
+{{- else}}
+// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}s.
+{{- end}}
type {{.Name}} struct {
{{.Fields}}
}
@@ -171,15 +209,15 @@ func (X86Features) {{.Feature}}() bool {
`
const simdLoadStoreTemplate = `
-// Len returns the number of elements in a {{.Name}}
+// Len returns the number of elements in {{.Article}} {{.Name}}.
func (x {{.Name}}) Len() int { return {{.Lanes}} }
-// Load{{.Name}} loads a {{.Name}} from an array
+// Load{{.Name}} loads {{.Article}} {{.Name}} from an array.
//
//go:noescape
func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
-// Store stores a {{.Name}} to an array
+// Store stores {{.Article}} {{.Name}} to an array.
//
//go:noescape
func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
@@ -199,21 +237,21 @@ func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
// Only the lower {{.Lanes}} bits of y are used.
{{- end}}
//
-// Asm: KMOV{{.IntelSizeSuffix}}, CPU Features: AVX512
+{{.ToBitsDoc}}
func (x {{.Name}}) ToBits() uint{{.LanesContainer}}
`
const simdMaskedLoadStoreTemplate = `
-// LoadMasked{{.Name}} loads a {{.Name}} from an array,
-// at those elements enabled by mask
+// LoadMasked{{.Name}} loads {{.Article}} {{.Name}} from an array,
+// at those elements enabled by mask.
//
{{.MaskedLoadDoc}}
//
//go:noescape
func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}}
-// StoreMasked stores a {{.Name}} to an array,
-// at those elements enabled by mask
+// StoreMasked stores {{.Article}} {{.Name}} to an array,
+// at those elements enabled by mask.
//
{{.MaskedStoreDoc}}
//
@@ -395,15 +433,15 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y"
{{end}}
{{define "vectorConversion"}}
-// {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}}
-func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}})
+// As{{.Tdst.Name}} returns {{.Tdst.Article}} {{.Tdst.Name}} with the same bit representation as x.
+func (x {{.Tsrc.Name}}) As{{.Tdst.Name}}() {{.Tdst.Name}}
{{end}}
{{define "mask"}}
-// To{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}}
+// To{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}}.
func (from {{.Name}}) To{{.VectorCounterpart}}() (to {{.VectorCounterpart}})
-// asMask converts from {{.VectorCounterpart}} to {{.Name}}
+// asMask converts from {{.VectorCounterpart}} to {{.Name}}.
func (from {{.VectorCounterpart}}) asMask() (to {{.Name}})
func (x {{.Name}}) And(y {{.Name}}) {{.Name}}
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdrules.go b/src/simd/archsimd/_gen/simdgen/gen_simdrules.go
index 90c3fb620e..7a8823483a 100644
--- a/src/simd/archsimd/_gen/simdgen/gen_simdrules.go
+++ b/src/simd/archsimd/_gen/simdgen/gen_simdrules.go
@@ -275,7 +275,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
origArgs = after
}
immArg = "[c] "
- immArgCombineOff = " [makeValAndOff(int32(int8(c)),off)] "
+ immArgCombineOff = " [makeValAndOff(int32(uint8(c)),off)] "
}
memOpData.ArgsLoadAddr = immArg + origArgs + fmt.Sprintf("l:(VMOVDQUload%d {sym} [off] ptr mem)", *lastVreg.Bits)
// Remove the last vreg from the arg and change it to "ptr".
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdssa.go b/src/simd/archsimd/_gen/simdgen/gen_simdssa.go
index c9d8693aa1..876ffabe3d 100644
--- a/src/simd/archsimd/_gen/simdgen/gen_simdssa.go
+++ b/src/simd/archsimd/_gen/simdgen/gen_simdssa.go
@@ -13,9 +13,7 @@ import (
)
var (
- ssaTemplates = template.Must(template.New("simdSSA").Parse(`
-{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
-
+ ssaTemplates = template.Must(template.New("simdSSA").Parse(`{{define "header"}}` + generatedHeader + `
package amd64
import (
diff --git a/src/simd/archsimd/_gen/simdgen/godefs.go b/src/simd/archsimd/_gen/simdgen/godefs.go
index 2c10377420..e956c1cd1d 100644
--- a/src/simd/archsimd/_gen/simdgen/godefs.go
+++ b/src/simd/archsimd/_gen/simdgen/godefs.go
@@ -135,6 +135,19 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
+ // For down conversions, the high elements are zeroed if the result has more elements.
+ // TODO: we should encode this logic in the YAML file, instead of hardcoding it here.
+ if len(o.In) > 0 && len(o.Out) > 0 {
+ inLanes := o.In[0].Lanes
+ outLanes := o.Out[0].Lanes
+ if inLanes != nil && outLanes != nil && *inLanes < *outLanes {
+ if (strings.Contains(o.Go, "Saturate") || strings.Contains(o.Go, "Truncate")) &&
+ !strings.Contains(o.Go, "Concat") {
+ o.Documentation += "\n// Results are packed to low elements in the returned vector, its upper elements are zeroed."
+ }
+ }
+ }
+
return nil
}
@@ -362,7 +375,7 @@ func compareNatural(s1, s2 string) int {
return strings.Compare(s1, s2)
}
-const generatedHeader = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+const generatedHeader = `// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
`
func writeGoDefs(path string, cl unify.Closure) error {
diff --git a/src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml
index 35e8104218..ac5bd825db 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml
@@ -17,21 +17,83 @@
// NAME subtracts corresponding elements of two vectors with saturation.
- go: AddPairs
commutative: false
+ out:
+ - elemBits: 16|32
documentation: !string |-
// NAME horizontally adds adjacent pairs of elements.
- // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+ // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
+- go: AddPairs
+ commutative: false
+ out:
+ - elemBits: 64
+ documentation: !string |-
+ // NAME horizontally adds adjacent pairs of elements.
+ // For x = [x0, x1] and y = [y0, y1], the result is [x0+x1, y0+y1].
- go: SubPairs
commutative: false
+ out:
+ - elemBits: 16|32
documentation: !string |-
// NAME horizontally subtracts adjacent pairs of elements.
- // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+ // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
+- go: SubPairs
+ commutative: false
+ out:
+ - elemBits: 64
+ documentation: !string |-
+ // NAME horizontally subtracts adjacent pairs of elements.
+ // For x = [x0, x1] and y = [y0, y1], the result is [x0-x1, y0-y1].
- go: AddPairsSaturated
commutative: false
documentation: !string |-
// NAME horizontally adds adjacent pairs of elements with saturation.
- // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+ // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
- go: SubPairsSaturated
commutative: false
documentation: !string |-
// NAME horizontally subtracts adjacent pairs of elements with saturation.
- // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+ // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
+- go: AddPairsGrouped
+ commutative: false
+ out:
+ - elemBits: 16|32
+ documentation: !string |-
+ // NAME horizontally adds adjacent pairs of elements.
+ // With each 128-bit as a group:
+ // for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
+- go: AddPairsGrouped
+ commutative: false
+ out:
+ - elemBits: 64
+ documentation: !string |-
+ // NAME horizontally adds adjacent pairs of elements.
+ // With each 128-bit as a group:
+ // for x = [x0, x1] and y = [y0, y1], the result is [x0+x1, y0+y1].
+- go: SubPairsGrouped
+ commutative: false
+ out:
+ - elemBits: 16|32
+ documentation: !string |-
+ // NAME horizontally subtracts adjacent pairs of elements.
+ // With each 128-bit as a group:
+ // for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
+- go: SubPairsGrouped
+ commutative: false
+ out:
+ - elemBits: 64
+ documentation: !string |-
+ // NAME horizontally subtracts adjacent pairs of elements.
+ // With each 128-bit as a group:
+ // for x = [x0, x1] and y = [y0, y1], the result is [x0-x1, y0-y1].
+- go: AddPairsSaturatedGrouped
+ commutative: false
+ documentation: !string |-
+ // NAME horizontally adds adjacent pairs of elements with saturation.
+ // With each 128-bit as a group:
+ // for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
+- go: SubPairsSaturatedGrouped
+ commutative: false
+ documentation: !string |-
+ // NAME horizontally subtracts adjacent pairs of elements with saturation.
+ // With each 128-bit as a group:
+ // for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
diff --git a/src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml
index 4423d8c7c6..17cee597d9 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml
@@ -53,25 +53,71 @@
- *uint
- go: AddPairs
asm: "VPHADD[DW]"
- in: *2any
- out: *1any
+ in: &2any128
+ - &any128
+ go: $t
+ bits: 128
+ - *any128
+ out: &1any128
+ - *any128
- go: SubPairs
asm: "VPHSUB[DW]"
- in: *2any
- out: *1any
+ in: *2any128
+ out: *1any128
- go: AddPairs
asm: "VHADDP[SD]" # floats
- in: *2any
- out: *1any
+ in: *2any128
+ out: *1any128
- go: SubPairs
asm: "VHSUBP[SD]" # floats
- in: *2any
- out: *1any
+ in: *2any128
+ out: *1any128
- go: AddPairsSaturated
asm: "VPHADDS[DW]"
- in: *2int
- out: *1int
+ in: &2int128
+ - &int128
+ go: $t
+ base: int
+ bits: 128
+ - *int128
+ out: &1int128
+ - *int128
- go: SubPairsSaturated
asm: "VPHSUBS[DW]"
- in: *2int
- out: *1int
+ in: *2int128
+ out: *1int128
+- go: AddPairsGrouped
+ asm: "VPHADD[DW]"
+ in: &2any256
+ - &any256
+ go: $t
+ bits: 256
+ - *any256
+ out: &1any256
+ - *any256
+- go: SubPairsGrouped
+ asm: "VPHSUB[DW]"
+ in: *2any256
+ out: *1any256
+- go: AddPairsGrouped
+ asm: "VHADDP[SD]" # floats
+ in: *2any256
+ out: *1any256
+- go: SubPairsGrouped
+ asm: "VHSUBP[SD]" # floats
+ in: *2any256
+ out: *1any256
+- go: AddPairsSaturatedGrouped
+ asm: "VPHADDS[DW]"
+ in: &2int256
+ - &int256
+ go: $t
+ base: int
+ bits: 256
+ - *int256
+ out: &1int256
+ - *int256
+- go: SubPairsSaturatedGrouped
+ asm: "VPHSUBS[DW]"
+ in: *2int256
+ out: *1int256
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml
index 4b639d7a34..97ee587503 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml
@@ -10,34 +10,29 @@
constImm: 0
commutative: true
documentation: !string |-
- // NAME returns x equals y, elementwise.
+ // NAME returns a mask whose elements indicate whether x == y.
- go: Less
constImm: 1
commutative: false
documentation: !string |-
- // NAME returns x less-than y, elementwise.
+ // NAME returns a mask whose elements indicate whether x < y.
- go: LessEqual
constImm: 2
commutative: false
documentation: !string |-
- // NAME returns x less-than-or-equals y, elementwise.
-- go: IsNan # For float only.
- constImm: 3
- commutative: true
- documentation: !string |-
- // NAME checks if elements are NaN. Use as x.IsNan(x).
+ // NAME returns a mask whose elements indicate whether x <= y.
- go: NotEqual
constImm: 4
commutative: true
documentation: !string |-
- // NAME returns x not-equals y, elementwise.
+ // NAME returns a mask whose elements indicate whether x != y.
- go: GreaterEqual
constImm: 13
commutative: false
documentation: !string |-
- // NAME returns x greater-than-or-equals y, elementwise.
+ // NAME returns a mask whose elements indicate whether x >= y.
- go: Greater
constImm: 14
commutative: false
documentation: !string |-
- // NAME returns x greater-than y, elementwise.
+ // NAME returns a mask whose elements indicate whether x > y.
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml
index 3f6c8a45b6..6dbfb57343 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml
@@ -121,7 +121,7 @@
- class: mask
# Floats
-- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
regexpTag: "compares"
asm: "VCMPP[SD]"
in:
@@ -135,7 +135,7 @@
- go: $t
overwriteBase: int
overwriteClass: mask
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
regexpTag: "compares"
asm: "VCMPP[SD]"
in:
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml
index dd33284063..698e6d9956 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml
@@ -44,124 +44,174 @@
// NAME converts element values to float64.
# Int <-> Int conversions
-- go: "(Extend|Saturate|Truncate)?ToInt8"
+- go: "TruncateToInt8"
commutative: false
regexpTag: "convert"
documentation: !string |-
- // NAME converts element values to int8.
-- go: "(Extend|Saturate|Truncate)?ToInt16(Concat)?"
+ // NAME truncates element values to int8.
+- go: "SaturateToInt8"
commutative: false
regexpTag: "convert"
documentation: !string |-
- // NAME converts element values to int16.
-- go: "(Extend|Saturate|Truncate)?ToInt32"
+ // NAME converts element values to int8 with signed saturation.
+- go: "ExtendToInt16(Concat)?"
commutative: false
regexpTag: "convert"
documentation: !string |-
- // NAME converts element values to int32.
-- go: "(Extend|Saturate|Truncate)?ToInt64"
+ // NAME sign-extends element values to int16.
+- go: "TruncateToInt16(Concat)?"
commutative: false
regexpTag: "convert"
documentation: !string |-
- // NAME converts element values to int64.
-- go: "(Extend|Saturate|Truncate)?ToUint8"
+ // NAME truncates element values to int16.
+- go: "SaturateToInt16(Concat(Grouped)?)?"
commutative: false
regexpTag: "convert"
documentation: !string |-
- // NAME converts element values to uint8.
-- go: "(Extend|Saturate|Truncate)?ToUint16(Concat)?"
+ // NAME converts element values to int16 with signed saturation.
+- go: "ExtendToInt32"
commutative: false
regexpTag: "convert"
documentation: !string |-
- // NAME converts element values to uint16.
-- go: "(Extend|Saturate|Truncate)?ToUint32"
+ // NAME sign-extends element values to int32.
+- go: "TruncateToInt32"
+ commutative: false
regexpTag: "convert"
+ documentation: !string |-
+ // NAME truncates element values to int32.
+- go: "SaturateToInt32"
commutative: false
+ regexpTag: "convert"
documentation: !string |-
- // NAME converts element values to uint32.
-- go: "(Extend|Saturate|Truncate)?ToUint64"
+ // NAME converts element values to int32 with signed saturation.
+- go: "ExtendToInt64"
+ commutative: false
regexpTag: "convert"
+ documentation: !string |-
+ // NAME sign-extends element values to int64.
+- go: "TruncateToUint8"
commutative: false
+ regexpTag: "convert"
documentation: !string |-
- // NAME converts element values to uint64.
+ // NAME truncates element values to uint8.
+- go: "SaturateToUint8"
+ commutative: false
+ regexpTag: "convert"
+ documentation: !string |-
+ // NAME converts element values to uint8 with unsigned saturation.
+- go: "ExtendToUint16(Concat)?"
+ commutative: false
+ regexpTag: "convert"
+ documentation: !string |-
+ // NAME zero-extends element values to uint16.
+- go: "TruncateToUint16(Concat)?"
+ commutative: false
+ regexpTag: "convert"
+ documentation: !string |-
+ // NAME truncates element values to uint16.
+- go: "SaturateToUint16(Concat(Grouped)?)?"
+ commutative: false
+ regexpTag: "convert"
+ documentation: !string |-
+ // NAME converts element values to uint16 with unsigned saturation.
+- go: "ExtendToUint32"
+ regexpTag: "convert"
+ commutative: false
+ documentation: !string |-
+ // NAME zero-extends element values to uint32.
+- go: "TruncateToUint32"
+ regexpTag: "convert"
+ commutative: false
+ documentation: !string |-
+ // NAME truncates element values to uint32.
+- go: "SaturateToUint32"
+ regexpTag: "convert"
+ commutative: false
+ documentation: !string |-
+ // NAME converts element values to uint32 with unsigned saturation.
+- go: "ExtendToUint64"
+ regexpTag: "convert"
+ commutative: false
+ documentation: !string |-
+ // NAME zero-extends element values to uint64.
# low-part only Int <-> Int conversions
-- go: ExtendLo8ToUint16x8
+- go: ExtendLo8ToUint16
commutative: false
documentation: !string |-
- // NAME converts 8 lowest vector element values to uint16.
-- go: ExtendLo8ToInt16x8
+ // NAME zero-extends 8 lowest vector element values to uint16.
+- go: ExtendLo8ToInt16
commutative: false
documentation: !string |-
- // NAME converts 8 lowest vector element values to int16.
-- go: ExtendLo4ToUint32x4
+ // NAME sign-extends 8 lowest vector element values to int16.
+- go: ExtendLo4ToUint32
commutative: false
documentation: !string |-
- // NAME converts 4 lowest vector element values to uint32.
-- go: ExtendLo4ToInt32x4
+ // NAME zero-extends 4 lowest vector element values to uint32.
+- go: ExtendLo4ToInt32
commutative: false
documentation: !string |-
- // NAME converts 4 lowest vector element values to int32.
-- go: ExtendLo2ToUint64x2
+ // NAME sign-extends 4 lowest vector element values to int32.
+- go: ExtendLo2ToUint64
commutative: false
documentation: !string |-
- // NAME converts 2 lowest vector element values to uint64.
-- go: ExtendLo2ToInt64x2
+ // NAME zero-extends 2 lowest vector element values to uint64.
+- go: ExtendLo2ToInt64
commutative: false
documentation: !string |-
- // NAME converts 2 lowest vector element values to int64.
-- go: ExtendLo2ToUint64x2
+ // NAME sign-extends 2 lowest vector element values to int64.
+- go: ExtendLo2ToUint64
commutative: false
documentation: !string |-
- // NAME converts 2 lowest vector element values to uint64.
-- go: ExtendLo4ToUint64x4
+ // NAME zero-extends 2 lowest vector element values to uint64.
+- go: ExtendLo4ToUint64
commutative: false
documentation: !string |-
- // NAME converts 4 lowest vector element values to uint64.
-- go: ExtendLo2ToInt64x2
+ // NAME zero-extends 4 lowest vector element values to uint64.
+- go: ExtendLo2ToInt64
commutative: false
documentation: !string |-
- // NAME converts 2 lowest vector element values to int64.
-- go: ExtendLo4ToInt64x4
+ // NAME sign-extends 2 lowest vector element values to int64.
+- go: ExtendLo4ToInt64
commutative: false
documentation: !string |-
- // NAME converts 4 lowest vector element values to int64.
-- go: ExtendLo4ToUint32x4
+ // NAME sign-extends 4 lowest vector element values to int64.
+- go: ExtendLo4ToUint32
commutative: false
documentation: !string |-
- // NAME converts 4 lowest vector element values to uint32.
-- go: ExtendLo8ToUint32x8
+ // NAME zero-extends 4 lowest vector element values to uint32.
+- go: ExtendLo8ToUint32
commutative: false
documentation: !string |-
- // NAME converts 8 lowest vector element values to uint32.
-- go: ExtendLo4ToInt32x4
+ // NAME zero-extends 8 lowest vector element values to uint32.
+- go: ExtendLo4ToInt32
commutative: false
documentation: !string |-
- // NAME converts 4 lowest vector element values to int32.
-- go: ExtendLo8ToInt32x8
+ // NAME sign-extends 4 lowest vector element values to int32.
+- go: ExtendLo8ToInt32
commutative: false
documentation: !string |-
- // NAME converts 8 lowest vector element values to int32.
-- go: ExtendLo2ToUint64x2
+ // NAME sign-extends 8 lowest vector element values to int32.
+- go: ExtendLo2ToUint64
commutative: false
documentation: !string |-
- // NAME converts 2 lowest vector element values to uint64.
-- go: ExtendLo4ToUint64x4
+ // NAME zero-extends 2 lowest vector element values to uint64.
+- go: ExtendLo4ToUint64
commutative: false
documentation: !string |-
- // NAME converts 4 lowest vector element values to uint64.
-- go: ExtendLo8ToUint64x8
+ // NAME zero-extends 4 lowest vector element values to uint64.
+- go: ExtendLo8ToUint64
commutative: false
documentation: !string |-
- // NAME converts 8 lowest vector element values to uint64.
-- go: ExtendLo2ToInt64x2
+ // NAME zero-extends 8 lowest vector element values to uint64.
+- go: ExtendLo2ToInt64
commutative: false
documentation: !string |-
- // NAME converts 2 lowest vector element values to int64.
-- go: ExtendLo4ToInt64x4
+ // NAME sign-extends 2 lowest vector element values to int64.
+- go: ExtendLo4ToInt64
commutative: false
documentation: !string |-
- // NAME converts 4 lowest vector element values to int64.
-- go: ExtendLo8ToInt64x8
+ // NAME sign-extends 4 lowest vector element values to int64.
+- go: ExtendLo8ToInt64
commutative: false
documentation: !string |-
- // NAME converts 8 lowest vector element values to int64. \ No newline at end of file
+ // NAME sign-extends 8 lowest vector element values to int64.
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
index af058124fb..2f19d12616 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
@@ -138,9 +138,6 @@
# Widening integer conversions.
# uint8 -> uint16
- go: ExtendToUint16
- addDoc: &zeroExtendDoc
- !string |-
- // The result vector's elements are zero-extended.
regexpTag: "convert"
asm: "VPMOVZXBW"
in:
@@ -156,7 +153,6 @@
- go: ExtendToUint16
regexpTag: "convert"
asm: "VPMOVZXBW"
- addDoc: *zeroExtendDoc
in:
- &u8x32
base: uint
@@ -171,9 +167,6 @@
- go: ExtendToInt16
regexpTag: "convert"
asm: "VPMOVSXBW"
- addDoc: &signExtendDoc
- !string |-
- // The result vector's elements are sign-extended.
in:
- &i8x16
base: int
@@ -187,7 +180,6 @@
- go: ExtendToInt16
regexpTag: "convert"
asm: "VPMOVSXBW"
- addDoc: *signExtendDoc
in:
- &i8x32
base: int
@@ -202,7 +194,6 @@
- go: ExtendToUint32
regexpTag: "convert"
asm: "VPMOVZXWD"
- addDoc: *zeroExtendDoc
in:
- &u16x8
base: uint
@@ -216,7 +207,6 @@
- go: ExtendToUint32
regexpTag: "convert"
asm: "VPMOVZXWD"
- addDoc: *zeroExtendDoc
in:
- *u16x16
out:
@@ -228,7 +218,6 @@
- go: ExtendToInt32
regexpTag: "convert"
asm: "VPMOVSXWD"
- addDoc: *signExtendDoc
in:
- &i16x8
base: int
@@ -242,7 +231,6 @@
- go: ExtendToInt32
regexpTag: "convert"
asm: "VPMOVSXWD"
- addDoc: *signExtendDoc
in:
- *i16x16
out:
@@ -254,7 +242,6 @@
- go: ExtendToUint64
regexpTag: "convert"
asm: "VPMOVZXDQ"
- addDoc: *zeroExtendDoc
in:
- &u32x4
base: uint
@@ -268,7 +255,6 @@
- go: ExtendToUint64
regexpTag: "convert"
asm: "VPMOVZXDQ"
- addDoc: *zeroExtendDoc
in:
- *u32x8
out:
@@ -280,7 +266,6 @@
- go: ExtendToInt64
regexpTag: "convert"
asm: "VPMOVSXDQ"
- addDoc: *signExtendDoc
in:
- &i32x4
base: int
@@ -294,7 +279,6 @@
- go: ExtendToInt64
regexpTag: "convert"
asm: "VPMOVSXDQ"
- addDoc: *signExtendDoc
in:
- *i32x8
out:
@@ -306,7 +290,6 @@
- go: ExtendToUint64
regexpTag: "convert"
asm: "VPMOVZXWQ"
- addDoc: *zeroExtendDoc
in:
- *u16x8
out:
@@ -315,7 +298,6 @@
- go: ExtendToInt64
regexpTag: "convert"
asm: "VPMOVSXWQ"
- addDoc: *signExtendDoc
in:
- *i16x8
out:
@@ -324,7 +306,6 @@
- go: ExtendToUint32
regexpTag: "convert"
asm: "VPMOVZXBD"
- addDoc: *zeroExtendDoc
in:
- *u8x16
out:
@@ -333,7 +314,6 @@
- go: ExtendToInt32
regexpTag: "convert"
asm: "VPMOVSXBD"
- addDoc: *signExtendDoc
in:
- *i8x16
out:
@@ -342,10 +322,6 @@
- go: TruncateToInt8
regexpTag: "convert"
asm: "VPMOV[WDQ]B"
- addDoc: &truncDocZeroUpper
- !string |-
- // Conversion is done with truncation on the vector elements.
- // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
in:
- base: int
out:
@@ -354,7 +330,6 @@
- go: TruncateToUint8
regexpTag: "convert"
asm: "VPMOV[WDQ]B"
- addDoc: *truncDocZeroUpper
in:
- base: uint
out:
@@ -363,9 +338,6 @@
- go: TruncateToInt8
regexpTag: "convert"
asm: "VPMOV[WDQ]B"
- addDoc: &truncDoc
- !string |-
- // Conversion is done with truncation on the vector elements.
in:
- base: int
out:
@@ -374,7 +346,6 @@
- go: TruncateToUint8
regexpTag: "convert"
asm: "VPMOV[WDQ]B"
- addDoc: *truncDoc
in:
- base: uint
out:
@@ -383,7 +354,6 @@
- go: TruncateToInt16
regexpTag: "convert"
asm: "VPMOV[DQ]W"
- addDoc: *truncDoc
in:
- base: int
out:
@@ -391,7 +361,6 @@
- go: TruncateToUint16
regexpTag: "convert"
asm: "VPMOV[DQ]W"
- addDoc: *truncDoc
in:
- base: uint
out:
@@ -399,7 +368,6 @@
- go: TruncateToInt32
regexpTag: "convert"
asm: "VPMOVQD"
- addDoc: *truncDoc
in:
- base: int
out:
@@ -407,7 +375,6 @@
- go: TruncateToUint32
regexpTag: "convert"
asm: "VPMOVQD"
- addDoc: *truncDoc
in:
- base: uint
out:
@@ -416,10 +383,6 @@
- go: SaturateToInt8
regexpTag: "convert"
asm: "VPMOVS[WDQ]B"
- addDoc: &satDocZeroUpper
- !string |-
- // Conversion is done with saturation on the vector elements.
- // Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
in:
- base: int
out:
@@ -427,19 +390,15 @@
bits: 128
- go: SaturateToUint8
regexpTag: "convert"
- asm: "VPMOVS[WDQ]B"
- addDoc: *satDocZeroUpper
+ asm: "VPMOVUS[WDQ]B"
in:
- - base: int
+ - base: uint
out:
- - base: int
+ - base: uint
bits: 128
- go: SaturateToInt8
regexpTag: "convert"
asm: "VPMOVS[WDQ]B"
- addDoc: &satDoc
- !string |-
- // Conversion is done with saturation on the vector elements.
in:
- base: int
out:
@@ -448,7 +407,6 @@
- go: SaturateToUint8
regexpTag: "convert"
asm: "VPMOVUS[WDQ]B"
- addDoc: *satDoc
in:
- base: uint
out:
@@ -457,7 +415,6 @@
- go: SaturateToInt16
regexpTag: "convert"
asm: "VPMOVS[DQ]W"
- addDoc: *satDoc
in:
- base: int
out:
@@ -465,7 +422,6 @@
- go: SaturateToUint16
regexpTag: "convert"
asm: "VPMOVUS[DQ]W"
- addDoc: *satDoc
in:
- base: uint
out:
@@ -473,7 +429,6 @@
- go: SaturateToInt32
regexpTag: "convert"
asm: "VPMOVSQD"
- addDoc: *satDoc
in:
- base: int
out:
@@ -481,7 +436,6 @@
- go: SaturateToUint32
regexpTag: "convert"
asm: "VPMOVUSQD"
- addDoc: *satDoc
in:
- base: uint
out:
@@ -492,67 +446,86 @@
asm: "VPACKSSDW"
addDoc: &satDocConcat
!string |-
+ // The converted elements from x will be packed to the lower part of the result vector,
+ // the converted elements from y will be packed to the upper part of the result vector.
+ in:
+ - base: int
+ - base: int
+ out:
+ - base: int
+ bits: 128
+- go: SaturateToInt16ConcatGrouped
+ regexpTag: "convert"
+ asm: "VPACKSSDW"
+ addDoc: &satDocConcatGrouped
+ !string |-
// With each 128-bit as a group:
- // The converted group from the first input vector will be packed to the lower part of the result vector,
- // the converted group from the second input vector will be packed to the upper part of the result vector.
- // Conversion is done with saturation on the vector elements.
+ // The converted elements from x will be packed to the lower part of the group in the result vector,
+ // the converted elements from y will be packed to the upper part of the group in the result vector.
in:
- base: int
- base: int
out:
- base: int
+ bits: 256|512
- go: SaturateToUint16Concat
regexpTag: "convert"
asm: "VPACKUSDW"
addDoc: *satDocConcat
in:
+ - base: int
+ - base: int
+ out:
- base: uint
- - base: uint
+ bits: 128
+- go: SaturateToUint16ConcatGrouped
+ regexpTag: "convert"
+ asm: "VPACKUSDW"
+ addDoc: *satDocConcatGrouped
+ in:
+ - base: int
+ - base: int
out:
- base: uint
+ bits: 256|512
# low-part only conversions.
# uint8->uint16
-- go: ExtendLo8ToUint16x8
+- go: ExtendLo8ToUint16
regexpTag: "convert"
asm: "VPMOVZXBW"
- addDoc: *zeroExtendDoc
in:
- *u8x16
out:
- *u16x8
# int8->int16
-- go: ExtendLo8ToInt16x8
+- go: ExtendLo8ToInt16
regexpTag: "convert"
asm: "VPMOVSXBW"
- addDoc: *signExtendDoc
in:
- *i8x16
out:
- *i16x8
# uint16->uint32
-- go: ExtendLo4ToUint32x4
+- go: ExtendLo4ToUint32
regexpTag: "convert"
asm: "VPMOVZXWD"
- addDoc: *zeroExtendDoc
in:
- *u16x8
out:
- *u32x4
# int16->int32
-- go: ExtendLo4ToInt32x4
+- go: ExtendLo4ToInt32
regexpTag: "convert"
asm: "VPMOVSXWD"
- addDoc: *signExtendDoc
in:
- *i16x8
out:
- *i32x4
# uint32 -> uint64
-- go: ExtendLo2ToUint64x2
+- go: ExtendLo2ToUint64
regexpTag: "convert"
asm: "VPMOVZXDQ"
- addDoc: *zeroExtendDoc
in:
- *u32x4
out:
@@ -561,10 +534,9 @@
elemBits: 64
bits: 128
# int32 -> int64
-- go: ExtendLo2ToInt64x2
+- go: ExtendLo2ToInt64
regexpTag: "convert"
asm: "VPMOVSXDQ"
- addDoc: *signExtendDoc
in:
- *i32x4
out:
@@ -573,120 +545,106 @@
elemBits: 64
bits: 128
# uint16 -> uint64
-- go: ExtendLo2ToUint64x2
+- go: ExtendLo2ToUint64
regexpTag: "convert"
asm: "VPMOVZXWQ"
- addDoc: *zeroExtendDoc
in:
- *u16x8
out:
- *u64x2
-- go: ExtendLo4ToUint64x4
+- go: ExtendLo4ToUint64
regexpTag: "convert"
asm: "VPMOVZXWQ"
- addDoc: *zeroExtendDoc
in:
- *u16x8
out:
- *u64x4
# int16 -> int64
-- go: ExtendLo2ToInt64x2
+- go: ExtendLo2ToInt64
regexpTag: "convert"
asm: "VPMOVSXWQ"
- addDoc: *signExtendDoc
in:
- *i16x8
out:
- *i64x2
-- go: ExtendLo4ToInt64x4
+- go: ExtendLo4ToInt64
regexpTag: "convert"
asm: "VPMOVSXWQ"
- addDoc: *signExtendDoc
in:
- *i16x8
out:
- *i64x4
# uint8 -> uint32
-- go: ExtendLo4ToUint32x4
+- go: ExtendLo4ToUint32
regexpTag: "convert"
asm: "VPMOVZXBD"
- addDoc: *zeroExtendDoc
in:
- *u8x16
out:
- *u32x4
-- go: ExtendLo8ToUint32x8
+- go: ExtendLo8ToUint32
regexpTag: "convert"
asm: "VPMOVZXBD"
- addDoc: *zeroExtendDoc
in:
- *u8x16
out:
- *u32x8
# int8 -> int32
-- go: ExtendLo4ToInt32x4
+- go: ExtendLo4ToInt32
regexpTag: "convert"
asm: "VPMOVSXBD"
- addDoc: *signExtendDoc
in:
- *i8x16
out:
- *i32x4
-- go: ExtendLo8ToInt32x8
+- go: ExtendLo8ToInt32
regexpTag: "convert"
asm: "VPMOVSXBD"
- addDoc: *signExtendDoc
in:
- *i8x16
out:
- *i32x8
# uint8 -> uint64
-- go: ExtendLo2ToUint64x2
+- go: ExtendLo2ToUint64
regexpTag: "convert"
asm: "VPMOVZXBQ"
- addDoc: *zeroExtendDoc
in:
- *u8x16
out:
- *u64x2
-- go: ExtendLo4ToUint64x4
+- go: ExtendLo4ToUint64
regexpTag: "convert"
asm: "VPMOVZXBQ"
- addDoc: *zeroExtendDoc
in:
- *u8x16
out:
- *u64x4
-- go: ExtendLo8ToUint64x8
+- go: ExtendLo8ToUint64
regexpTag: "convert"
asm: "VPMOVZXBQ"
- addDoc: *zeroExtendDoc
in:
- *u8x16
out:
- *u64x8
# int8 -> int64
-- go: ExtendLo2ToInt64x2
+- go: ExtendLo2ToInt64
regexpTag: "convert"
asm: "VPMOVSXBQ"
- addDoc: *signExtendDoc
in:
- *i8x16
out:
- *i64x2
-- go: ExtendLo4ToInt64x4
+- go: ExtendLo4ToInt64
regexpTag: "convert"
asm: "VPMOVSXBQ"
- addDoc: *signExtendDoc
in:
- *i8x16
out:
- *i64x4
-- go: ExtendLo8ToInt64x8
+- go: ExtendLo8ToInt64
regexpTag: "convert"
asm: "VPMOVSXBQ"
- addDoc: *signExtendDoc
in:
- *i8x16
out:
- - *i64x8 \ No newline at end of file
+ - *i64x8
diff --git a/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml
index f2d8af6886..90f5208ff7 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml
@@ -18,12 +18,13 @@
- go: Scale
commutative: false
documentation: !string |-
- // NAME multiplies elements by a power of 2.
+ // NAME multiplies each element of x by 2 raised to the power of the
+ // floor of the corresponding element in y.
- go: RoundToEven
commutative: false
constImm: 0
documentation: !string |-
- // NAME rounds elements to the nearest integer.
+ // NAME rounds elements to the nearest integer, rounding ties to even.
- go: RoundToEvenScaled
commutative: false
constImm: 0
diff --git a/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml
index bf33642a11..ae6554d731 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml
@@ -12,8 +12,8 @@
# Applies sign of second operand to first: sign(val, sign_src)
commutative: false
documentation: !string |-
- // NAME returns the product of the first operand with -1, 0, or 1,
- // whichever constant is nearest to the value of the second operand.
+ // NAME returns the product of x with -1, 0, or 1,
+ // whichever constant is nearest to the value of y.
# Sign does not have masked version
- go: OnesCount
commutative: false
diff --git a/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml
index 2b1da7adaf..54a8ece574 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml
@@ -10,21 +10,10 @@
documentation: !string |-
// NAME multiplies the elements and add the pairs together with saturation,
// yielding a vector of half as many elements with twice the input element size.
-# QuadDotProduct, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
# - go: DotProductBroadcast
# commutative: true
# # documentation: !string |-
# // NAME multiplies all elements and broadcasts the sum.
-- go: DotProductQuadruple
- commutative: false
- documentation: !string |-
- // NAME performs dot products on groups of 4 elements of x and y.
- // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-- go: DotProductQuadrupleSaturated
- commutative: false
- documentation: !string |-
- // NAME multiplies performs dot products on groups of 4 elements of x and y.
- // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction.
- go: AddDotProductPairs
commutative: false
noTypes: "true"
diff --git a/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml
index 4a1195b52d..18ce8a53b2 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml
@@ -33,33 +33,6 @@
# const: 127
# out:
# - *dpb_src
-- go: DotProductQuadruple
- asm: "VPDPBUSD"
- operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0
- in:
- - &qdpa_acc
- go: $t_acc
- base: int
- elemBits: 32
- - &qdpa_src1
- go: $t_src1
- base: uint
- overwriteElementBits: 8
- - &qdpa_src2
- go: $t_src2
- base: int
- overwriteElementBits: 8
- out:
- - *qdpa_acc
-- go: DotProductQuadrupleSaturated
- asm: "VPDPBUSDS"
- operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0
- in:
- - *qdpa_acc
- - *qdpa_src1
- - *qdpa_src2
- out:
- - *qdpa_acc
- go: AddDotProductPairs
asm: "VPDPWSSD"
in:
diff --git a/src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml
index a7e30f4693..1d79d85a46 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml
@@ -2,8 +2,8 @@
- go: Max
commutative: true
documentation: !string |-
- // NAME computes the maximum of corresponding elements.
+ // NAME computes the maximum of each pair of corresponding elements in x and y.
- go: Min
commutative: true
documentation: !string |-
- // NAME computes the minimum of corresponding elements.
+ // NAME computes the minimum of each pair of corresponding elements in x and y.
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml
index 3c86974e8a..38bc9374cc 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml
@@ -31,17 +31,23 @@
commutative: false
documentation: !string |-
// NAME performs a full permutation of vector x using indices:
- // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+ //
+ // result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+ //
- go: Permute
commutative: false
documentation: !string |-
// NAME performs a full permutation of vector x using indices:
- // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+ //
+ // result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+ //
- go: ConcatPermute # ConcatPermute is only available on or after AVX512
commutative: false
documentation: !string |-
// NAME performs a full permutation of vector x, y using indices:
- // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+ //
+ // result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+ //
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
- go: Compress
@@ -236,12 +242,12 @@
- go: ConcatShiftBytesRight
commutative: false
documentation: !string |-
- // NAME concatenates x and y and shift it right by constant bytes.
+ // NAME concatenates x and y and shift it right by shift bytes.
// The result vector will be the lower half of the concatenated vector.
- go: ConcatShiftBytesRightGrouped
commutative: false
documentation: !string |-
- // NAME concatenates x and y and shift it right by constant bytes.
+ // NAME concatenates x and y and shift it right by shift bytes.
// The result vector will be the lower half of the concatenated vector.
// This operation is performed grouped by each 16 byte.
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml
index 726a983ac4..e1fd184ed7 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml
@@ -227,7 +227,7 @@
- go: Permute
asm: "VPERMQ|VPERMPD"
addDoc: !string |-
- // The low 2 bits (values 0-3) of each element of indices is used
+ // The low 2 bits (values 0-3) of each element of indices is used.
operandOrder: "21Type1"
in:
- &anyindices
@@ -244,7 +244,7 @@
- go: Permute
asm: "VPERM[WDQ]|VPERMP[SD]"
addDoc: !string |-
- // The low 3 bits (values 0-7) of each element of indices is used
+ // The low 3 bits (values 0-7) of each element of indices is used.
operandOrder: "21Type1"
in:
- *anyindices
@@ -257,7 +257,7 @@
- go: Permute
asm: "VPERM[BWD]|VPERMPS"
addDoc: !string |-
- // The low 4 bits (values 0-15) of each element of indices is used
+ // The low 4 bits (values 0-15) of each element of indices is used.
operandOrder: "21Type1"
in:
- *anyindices
@@ -270,7 +270,7 @@
- go: Permute
asm: "VPERM[BW]"
addDoc: !string |-
- // The low 5 bits (values 0-31) of each element of indices is used
+ // The low 5 bits (values 0-31) of each element of indices is used.
operandOrder: "21Type1"
in:
- *anyindices
@@ -283,7 +283,7 @@
- go: Permute
asm: "VPERMB"
addDoc: !string |-
- // The low 6 bits (values 0-63) of each element of indices is used
+ // The low 6 bits (values 0-63) of each element of indices is used.
operandOrder: "21Type1"
in:
- *anyindices
@@ -489,7 +489,9 @@
- go: PermuteOrZeroGrouped
asm: VPSHUFB
addDoc: !string |-
- // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+ //
+ // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+ //
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
@@ -506,7 +508,9 @@
- go: permuteScalars
asm: VPSHUFD
addDoc: !string |-
- // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
+ //
+ // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
+ //
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
in:
- *128any
@@ -520,7 +524,9 @@
- go: permuteScalarsGrouped
asm: VPSHUFD
addDoc: !string |-
- // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+ //
+ // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+ //
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
// Each group is of size 128-bit.
in:
@@ -535,7 +541,9 @@
- go: permuteScalarsLo
asm: VPSHUFLW
addDoc: !string |-
- // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
+ //
+ // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
+ //
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
in:
- &128lanes8
@@ -573,7 +581,9 @@
- go: permuteScalarsHi
asm: VPSHUFHW
addDoc: !string |-
- // result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
+ //
+ // result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
+ //
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
in:
- *128lanes8
@@ -1001,6 +1011,7 @@
- *uint128
- class: immediate
immOffset: 0
+ name: shift
out:
- *uint128
@@ -1014,5 +1025,6 @@
- *uint256512
- class: immediate
immOffset: 0
+ name: shift
out:
- *uint256512
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml
index 92491b51d4..bb020ed48f 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml
@@ -7,7 +7,7 @@
commutative: true
documentation: !string |-
// NAME multiplies even-indexed elements, widening the result.
- // Result[i] = v1.Even[i] * v2.Even[i].
+ // Result[i] = v1[2*i] * v2[2*i].
- go: MulHigh
commutative: true
documentation: !string |-
diff --git a/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml
index 0d0b006cfb..0d205aab79 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml
@@ -4,21 +4,21 @@
specialLower: sftimm
commutative: false
documentation: !string |-
- // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+ // NAME shifts each element to the left by y bits.
- go: ShiftAllRight
signed: false
nameAndSizeCheck: true
specialLower: sftimm
commutative: false
documentation: !string |-
- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+ // NAME performs an unsigned right shift on each element by y bits.
- go: ShiftAllRight
signed: true
specialLower: sftimm
nameAndSizeCheck: true
commutative: false
documentation: !string |-
- // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+ // NAME performs a signed right shift on each element by y bits.
- go: shiftAllLeftConst # no APIs, only ssa ops.
noTypes: "true"
noGenericOps: "true"
@@ -44,24 +44,24 @@
nameAndSizeCheck: true
commutative: false
documentation: !string |-
- // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+ // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements.
- go: ShiftRight
signed: false
nameAndSizeCheck: true
commutative: false
documentation: !string |-
- // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+ // NAME performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
- go: ShiftRight
signed: true
nameAndSizeCheck: true
commutative: false
documentation: !string |-
- // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+ // NAME performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
- go: RotateAllLeft
nameAndSizeCheck: true
commutative: false
documentation: !string |-
- // NAME rotates each element to the left by the number of bits specified by the immediate.
+ // NAME rotates each element to the left by the number of bits specified by shift.
- go: RotateLeft
nameAndSizeCheck: true
commutative: false
@@ -71,7 +71,7 @@
nameAndSizeCheck: true
commutative: false
documentation: !string |-
- // NAME rotates each element to the right by the number of bits specified by the immediate.
+ // NAME rotates each element to the right by the number of bits specified by shift.
- go: RotateRight
nameAndSizeCheck: true
commutative: false
@@ -81,23 +81,23 @@
nameAndSizeCheck: true
commutative: false
documentation: !string |-
- // NAME shifts each element of x to the left by the number of bits specified by the
- // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+ // NAME shifts each element of x to the left by the number of bits specified by
+ // shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
- go: ShiftAllRightConcat
nameAndSizeCheck: true
commutative: false
documentation: !string |-
- // NAME shifts each element of x to the right by the number of bits specified by the
- // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+ // NAME shifts each element of x to the right by the number of bits specified by
+ // shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
- go: ShiftLeftConcat
nameAndSizeCheck: true
commutative: false
documentation: !string |-
// NAME shifts each element of x to the left by the number of bits specified by the
- // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+ // corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
- go: ShiftRightConcat
nameAndSizeCheck: true
commutative: false
documentation: !string |-
// NAME shifts each element of x to the right by the number of bits specified by the
- // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+ // corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
diff --git a/src/simd/archsimd/_gen/tmplgen/main.go b/src/simd/archsimd/_gen/tmplgen/main.go
index 473e4f14c0..8db185e1e0 100644
--- a/src/simd/archsimd/_gen/tmplgen/main.go
+++ b/src/simd/archsimd/_gen/tmplgen/main.go
@@ -40,17 +40,23 @@ func (sat shapeAndTemplate) target(outType string, width int) shapeAndTemplate {
newSat := sat
newShape := *sat.s
newShape.output = func(t string, w, c int) (ot string, ow int, oc int) {
- return outType, width, c
+ oc = c
+ if width*c > 512 {
+ oc = 512 / width
+ } else if width*c < 128 {
+ oc = 128 / width
+ }
+ return outType, width, oc
}
newSat.s = &newShape
return newSat
}
-func (sat shapeAndTemplate) shrinkTo(outType string, by int) shapeAndTemplate {
+func (sat shapeAndTemplate) targetFixed(outType string, width, count int) shapeAndTemplate {
newSat := sat
newShape := *sat.s
newShape.output = func(t string, w, c int) (ot string, ow int, oc int) {
- return outType, w / by, c * by
+ return outType, width, count
}
newSat.s = &newShape
return newSat
@@ -98,6 +104,17 @@ var uintShapes = &shapes{
uints: []int{8, 16, 32, 64},
}
+var floatShapes = &shapes{
+ vecs: []int{128, 256, 512},
+ floats: []int{32, 64},
+}
+
+var integerShapes = &shapes{
+ vecs: []int{128, 256, 512},
+ ints: []int{8, 16, 32, 64},
+ uints: []int{8, 16, 32, 64},
+}
+
var avx512Shapes = &shapes{
vecs: []int{512},
ints: []int{8, 16, 32, 64},
@@ -278,7 +295,7 @@ func testPrologue(t, s string, out io.Writer) {
fmt.Fprintf(out,
`// Code generated by '%s'; DO NOT EDIT.
-//go:build goexperiment.simd
+//go:build goexperiment.simd && amd64
// This file contains functions testing %s.
// Each function in this file is specialized for a
@@ -311,12 +328,12 @@ func shapedTemplateOf(s *shapes, name, temp string) shapeAndTemplate {
}
var sliceTemplate = templateOf("slice", `
-// Load{{.VType}}Slice loads {{.AOrAn}} {{.VType}} from a slice of at least {{.Count}} {{.Etype}}s
+// Load{{.VType}}Slice loads {{.AOrAn}} {{.VType}} from a slice of at least {{.Count}} {{.Etype}}s.
func Load{{.VType}}Slice(s []{{.Etype}}) {{.VType}} {
return Load{{.VType}}((*[{{.Count}}]{{.Etype}})(s))
}
-// StoreSlice stores x into a slice of at least {{.Count}} {{.Etype}}s
+// StoreSlice stores x into a slice of at least {{.Count}} {{.Etype}}s.
func (x {{.VType}}) StoreSlice(s []{{.Etype}}) {
x.Store((*[{{.Count}}]{{.Etype}})(s))
}
@@ -356,15 +373,49 @@ func test{{.VType}}UnaryFlaky(t *testing.T, f func(x archsimd.{{.VType}}) archsi
`)
var convertTemplate = templateOf("convert_helpers", `
-// test{{.VType}}ConvertTo{{.OEType}} tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// test{{.VType}}ConvertTo{{.OEType}} tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) {
n := {{.Count}}
t.Helper()
forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
t.Helper()
a := archsimd.Load{{.VType}}Slice(x)
- g := make([]{{.OEtype}}, n)
+ g := make([]{{.OEtype}}, {{.OCount}})
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
+ })
+}
+`)
+
+var (
+ // templates and shapes for conversion.
+ // TODO: this includes shapes where in and out have the same element type,
+ // which are not needed.
+ unaryToInt8 = convertTemplate.target("int", 8)
+ unaryToUint8 = convertTemplate.target("uint", 8)
+ unaryToInt16 = convertTemplate.target("int", 16)
+ unaryToUint16 = convertTemplate.target("uint", 16)
+ unaryToInt32 = convertTemplate.target("int", 32)
+ unaryToUint32 = convertTemplate.target("uint", 32)
+ unaryToInt64 = convertTemplate.target("int", 64)
+ unaryToUint64 = convertTemplate.target("uint", 64)
+ unaryToFloat32 = convertTemplate.target("float", 32)
+ unaryToFloat64 = convertTemplate.target("float", 64)
+)
+
+var convertLoTemplate = shapedTemplateOf(integerShapes, "convert_lo_helpers", `
+// test{{.VType}}ConvertLoTo{{.OVType}} tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low {{.OCount}} elements.
+func test{{.VType}}ConvertLoTo{{.OVType}}(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) {
+ n := {{.Count}}
+ t.Helper()
+ forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
+ t.Helper()
+ a := archsimd.Load{{.VType}}Slice(x)
+ g := make([]{{.OEtype}}, {{.OCount}})
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
@@ -372,9 +423,23 @@ func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x archsimd.{{.VType
}
`)
-var unaryToInt32 = convertTemplate.target("int", 32)
-var unaryToUint32 = convertTemplate.target("uint", 32)
-var unaryToUint16 = convertTemplate.target("uint", 16)
+var (
+ // templates and shapes for conversion of low elements.
+ // The output is fixed to 128- or 256-bits (no 512-bit, as the
+ // regular convertTemplate covers that).
+ // TODO: this includes shapes where in and out have the same element
+ // type or length, which are not needed.
+ unaryToInt64x2 = convertLoTemplate.targetFixed("int", 64, 2)
+ unaryToInt64x4 = convertLoTemplate.targetFixed("int", 64, 4)
+ unaryToUint64x2 = convertLoTemplate.targetFixed("uint", 64, 2)
+ unaryToUint64x4 = convertLoTemplate.targetFixed("uint", 64, 4)
+ unaryToInt32x4 = convertLoTemplate.targetFixed("int", 32, 4)
+ unaryToInt32x8 = convertLoTemplate.targetFixed("int", 32, 8)
+ unaryToUint32x4 = convertLoTemplate.targetFixed("uint", 32, 4)
+ unaryToUint32x8 = convertLoTemplate.targetFixed("uint", 32, 8)
+ unaryToInt16x8 = convertLoTemplate.targetFixed("int", 16, 8)
+ unaryToUint16x8 = convertLoTemplate.targetFixed("uint", 16, 8)
+)
var binaryTemplate = templateOf("binary_helpers", `
// test{{.VType}}Binary tests the simd binary method f against the expected behavior generated by want
@@ -447,6 +512,22 @@ func test{{.VType}}Compare(t *testing.T, f func(_, _ archsimd.{{.VType}}) archsi
}
`)
+var compareUnaryTemplate = shapedTemplateOf(floatShapes, "compare_unary_helpers", `
+// test{{.VType}}UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want
+func test{{.VType}}UnaryCompare(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.Mask{{.WxC}}, want func(x []{{.Etype}}) []int64) {
+ n := {{.Count}}
+ t.Helper()
+ forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool {
+ t.Helper()
+ a := archsimd.Load{{.VType}}Slice(x)
+ g := make([]int{{.EWidth}}, n)
+ f(a).ToInt{{.WxC}}().StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)})
+ })
+}
+`)
+
// TODO this has not been tested yet.
var compareMaskedTemplate = templateOf("comparemasked_helpers", `
// test{{.VType}}CompareMasked tests the simd masked comparison method f against the expected behavior generated by want
@@ -580,32 +661,32 @@ func (t templateData) CPUfeature() string {
}
var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", `
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature {{.CPUfeature}}
+// Emulated, CPU Feature: {{.CPUfeature}}
func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} {
return y.Greater(x)
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature {{.CPUfeature}}
+// Emulated, CPU Feature: {{.CPUfeature}}
func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} {
ones := x.Equal(x).ToInt{{.WxC}}()
return y.Greater(x).ToInt{{.WxC}}().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature {{.CPUfeature}}
+// Emulated, CPU Feature: {{.CPUfeature}}
func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} {
ones := x.Equal(x).ToInt{{.WxC}}()
return x.Greater(y).ToInt{{.WxC}}().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature {{.CPUfeature}}
+// Emulated, CPU Feature: {{.CPUfeature}}
func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} {
ones := x.Equal(x).ToInt{{.WxC}}()
return x.Equal(y).ToInt{{.WxC}}().Xor(ones).asMask()
@@ -613,18 +694,18 @@ func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} {
`)
var bitWiseIntTemplate = shapedTemplateOf(intShapes, "bitwise int complement", `
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature {{.CPUfeature}}
+// Emulated, CPU Feature: {{.CPUfeature}}
func (x {{.VType}}) Not() {{.VType}} {
return x.Xor(x.Equal(x).ToInt{{.WxC}}())
}
`)
var bitWiseUintTemplate = shapedTemplateOf(uintShapes, "bitwise uint complement", `
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature {{.CPUfeature}}
+// Emulated, CPU Feature: {{.CPUfeature}}
func (x {{.VType}}) Not() {{.VType}} {
return x.Xor(x.Equal(x).ToInt{{.WxC}}().As{{.VType}}())
}
@@ -643,9 +724,9 @@ func (t templateData) CPUfeatureAVX2if8() string {
}
var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", `
-// Greater returns a mask whose elements indicate whether x > y
+// Greater returns a mask whose elements indicate whether x > y.
//
-// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
+// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}}
func (x {{.VType}}) Greater(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
{{- if eq .EWidth 8}}
@@ -657,9 +738,9 @@ func (x {{.VType}}) Greater(y {{.VType}}) Mask{{.WxC}} {
return a.Xor(signs).Greater(b.Xor(signs))
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
+// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}}
func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
{{- if eq .EWidth 8}}
@@ -671,9 +752,9 @@ func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} {
return b.Xor(signs).Greater(a.Xor(signs))
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
+// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}}
func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).ToInt{{.WxC}}()
@@ -685,9 +766,9 @@ func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} {
return b.Xor(signs).Greater(a.Xor(signs)).ToInt{{.WxC}}().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
+// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}}
func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).ToInt{{.WxC}}()
@@ -699,9 +780,9 @@ func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} {
return a.Xor(signs).Greater(b.Xor(signs)).ToInt{{.WxC}}().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature {{.CPUfeature}}
+// Emulated, CPU Feature: {{.CPUfeature}}
func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} {
a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
ones := x.Equal(x).ToInt{{.WxC}}()
@@ -758,7 +839,7 @@ func (x {{.VType}}) Masked(mask Mask{{.WxC}}) {{.VType}} {
{{- end -}}
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x {{.VType}}) Merge(y {{.VType}}, mask Mask{{.WxC}}) {{.VType}} {
{{- if eq .Base "Int" }}
return y.blendMasked(x, mask)
@@ -789,7 +870,7 @@ var broadcastTemplate = templateOf("Broadcast functions", `
// Broadcast{{.VType}} returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature {{.CPUfeatureBC}}
+// Emulated, CPU Feature: {{.CPUfeatureBC}}
func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} {
var z {{.As128BitVec }}
return z.SetElem(0, x).Broadcast{{.Vwidth}}()
@@ -804,7 +885,7 @@ func (from {{.Base}}{{.WxC}}) ToMask() (to Mask{{.WxC}}) {
`)
var stringTemplate = shapedTemplateOf(allShapes, "String methods", `
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x {{.VType}}) String() string {
var s [{{.Count}}]{{.Etype}}
x.Store(&s)
@@ -862,7 +943,17 @@ func main() {
one(*ush, unsafePrologue, unsafePATemplate)
}
if *uh != "" {
- one(*uh, curryTestPrologue("unary simd methods"), unaryTemplate, unaryToInt32, unaryToUint32, unaryToUint16, unaryFlakyTemplate)
+ one(*uh, curryTestPrologue("unary simd methods"), unaryTemplate,
+ unaryToInt8, unaryToUint8, unaryToInt16, unaryToUint16,
+ unaryToInt32, unaryToUint32, unaryToInt64, unaryToUint64,
+ unaryToFloat32, unaryToFloat64,
+ unaryToInt64x2, unaryToInt64x4,
+ unaryToUint64x2, unaryToUint64x4,
+ unaryToInt32x4, unaryToInt32x8,
+ unaryToUint32x4, unaryToUint32x8,
+ unaryToInt16x8, unaryToUint16x8,
+ unaryFlakyTemplate,
+ )
}
if *bh != "" {
one(*bh, curryTestPrologue("binary simd methods"), binaryTemplate)
@@ -871,7 +962,7 @@ func main() {
one(*th, curryTestPrologue("ternary simd methods"), ternaryTemplate, ternaryFlakyTemplate)
}
if *ch != "" {
- one(*ch, curryTestPrologue("simd methods that compare two operands"), compareTemplate)
+ one(*ch, curryTestPrologue("simd methods that compare two operands"), compareTemplate, compareUnaryTemplate)
}
if *cmh != "" {
one(*cmh, curryTestPrologue("simd methods that compare two operands under a mask"), compareMaskedTemplate)
@@ -1018,7 +1109,7 @@ func nonTemplateRewrites(filename string, prologue func(s string, out io.Writer)
out := new(bytes.Buffer)
- prologue("go run genfiles.go", out)
+ prologue("tmplgen", out)
for _, rewrite := range rewrites {
rewrite(out)
}
@@ -1054,7 +1145,7 @@ func one(filename string, prologue func(s string, out io.Writer), sats ...shapeA
out := new(bytes.Buffer)
- prologue("go run genfiles.go", out)
+ prologue("tmplgen", out)
for _, sat := range sats {
sat.forTemplates(out)
}
diff --git a/src/simd/archsimd/compare_gen_amd64.go b/src/simd/archsimd/compare_gen_amd64.go
index a8636f0b33..09f8277dc9 100644
--- a/src/simd/archsimd/compare_gen_amd64.go
+++ b/src/simd/archsimd/compare_gen_amd64.go
@@ -1,278 +1,278 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
//go:build goexperiment.simd
package archsimd
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int8x16) Less(y Int8x16) Mask8x16 {
return y.Greater(x)
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 {
ones := x.Equal(x).ToInt8x16()
return y.Greater(x).ToInt8x16().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int8x16) LessEqual(y Int8x16) Mask8x16 {
ones := x.Equal(x).ToInt8x16()
return x.Greater(y).ToInt8x16().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int8x16) NotEqual(y Int8x16) Mask8x16 {
ones := x.Equal(x).ToInt8x16()
return x.Equal(y).ToInt8x16().Xor(ones).asMask()
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int16x8) Less(y Int16x8) Mask16x8 {
return y.Greater(x)
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 {
ones := x.Equal(x).ToInt16x8()
return y.Greater(x).ToInt16x8().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int16x8) LessEqual(y Int16x8) Mask16x8 {
ones := x.Equal(x).ToInt16x8()
return x.Greater(y).ToInt16x8().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int16x8) NotEqual(y Int16x8) Mask16x8 {
ones := x.Equal(x).ToInt16x8()
return x.Equal(y).ToInt16x8().Xor(ones).asMask()
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int32x4) Less(y Int32x4) Mask32x4 {
return y.Greater(x)
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 {
ones := x.Equal(x).ToInt32x4()
return y.Greater(x).ToInt32x4().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int32x4) LessEqual(y Int32x4) Mask32x4 {
ones := x.Equal(x).ToInt32x4()
return x.Greater(y).ToInt32x4().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int32x4) NotEqual(y Int32x4) Mask32x4 {
ones := x.Equal(x).ToInt32x4()
return x.Equal(y).ToInt32x4().Xor(ones).asMask()
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int64x2) Less(y Int64x2) Mask64x2 {
return y.Greater(x)
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 {
ones := x.Equal(x).ToInt64x2()
return y.Greater(x).ToInt64x2().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int64x2) LessEqual(y Int64x2) Mask64x2 {
ones := x.Equal(x).ToInt64x2()
return x.Greater(y).ToInt64x2().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int64x2) NotEqual(y Int64x2) Mask64x2 {
ones := x.Equal(x).ToInt64x2()
return x.Equal(y).ToInt64x2().Xor(ones).asMask()
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int8x32) Less(y Int8x32) Mask8x32 {
return y.Greater(x)
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 {
ones := x.Equal(x).ToInt8x32()
return y.Greater(x).ToInt8x32().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int8x32) LessEqual(y Int8x32) Mask8x32 {
ones := x.Equal(x).ToInt8x32()
return x.Greater(y).ToInt8x32().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int8x32) NotEqual(y Int8x32) Mask8x32 {
ones := x.Equal(x).ToInt8x32()
return x.Equal(y).ToInt8x32().Xor(ones).asMask()
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int16x16) Less(y Int16x16) Mask16x16 {
return y.Greater(x)
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 {
ones := x.Equal(x).ToInt16x16()
return y.Greater(x).ToInt16x16().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int16x16) LessEqual(y Int16x16) Mask16x16 {
ones := x.Equal(x).ToInt16x16()
return x.Greater(y).ToInt16x16().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int16x16) NotEqual(y Int16x16) Mask16x16 {
ones := x.Equal(x).ToInt16x16()
return x.Equal(y).ToInt16x16().Xor(ones).asMask()
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int32x8) Less(y Int32x8) Mask32x8 {
return y.Greater(x)
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 {
ones := x.Equal(x).ToInt32x8()
return y.Greater(x).ToInt32x8().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int32x8) LessEqual(y Int32x8) Mask32x8 {
ones := x.Equal(x).ToInt32x8()
return x.Greater(y).ToInt32x8().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int32x8) NotEqual(y Int32x8) Mask32x8 {
ones := x.Equal(x).ToInt32x8()
return x.Equal(y).ToInt32x8().Xor(ones).asMask()
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int64x4) Less(y Int64x4) Mask64x4 {
return y.Greater(x)
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 {
ones := x.Equal(x).ToInt64x4()
return y.Greater(x).ToInt64x4().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int64x4) LessEqual(y Int64x4) Mask64x4 {
ones := x.Equal(x).ToInt64x4()
return x.Greater(y).ToInt64x4().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int64x4) NotEqual(y Int64x4) Mask64x4 {
ones := x.Equal(x).ToInt64x4()
return x.Equal(y).ToInt64x4().Xor(ones).asMask()
}
-// Greater returns a mask whose elements indicate whether x > y
+// Greater returns a mask whose elements indicate whether x > y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x16) Greater(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
signs := BroadcastInt8x16(-1 << (8 - 1))
return a.Xor(signs).Greater(b.Xor(signs))
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x16) Less(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
signs := BroadcastInt8x16(-1 << (8 - 1))
return b.Xor(signs).Greater(a.Xor(signs))
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
ones := x.Equal(x).ToInt8x16()
@@ -280,9 +280,9 @@ func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 {
return b.Xor(signs).Greater(a.Xor(signs)).ToInt8x16().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
ones := x.Equal(x).ToInt8x16()
@@ -290,18 +290,18 @@ func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 {
return a.Xor(signs).Greater(b.Xor(signs)).ToInt8x16().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 {
a, b := x.AsInt8x16(), y.AsInt8x16()
ones := x.Equal(x).ToInt8x16()
return a.Equal(b).ToInt8x16().Xor(ones).asMask()
}
-// Greater returns a mask whose elements indicate whether x > y
+// Greater returns a mask whose elements indicate whether x > y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint16x8) Greater(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).ToInt16x8()
@@ -309,9 +309,9 @@ func (x Uint16x8) Greater(y Uint16x8) Mask16x8 {
return a.Xor(signs).Greater(b.Xor(signs))
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint16x8) Less(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).ToInt16x8()
@@ -319,9 +319,9 @@ func (x Uint16x8) Less(y Uint16x8) Mask16x8 {
return b.Xor(signs).Greater(a.Xor(signs))
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).ToInt16x8()
@@ -329,9 +329,9 @@ func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 {
return b.Xor(signs).Greater(a.Xor(signs)).ToInt16x8().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).ToInt16x8()
@@ -339,18 +339,18 @@ func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 {
return a.Xor(signs).Greater(b.Xor(signs)).ToInt16x8().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 {
a, b := x.AsInt16x8(), y.AsInt16x8()
ones := x.Equal(x).ToInt16x8()
return a.Equal(b).ToInt16x8().Xor(ones).asMask()
}
-// Greater returns a mask whose elements indicate whether x > y
+// Greater returns a mask whose elements indicate whether x > y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint32x4) Greater(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).ToInt32x4()
@@ -358,9 +358,9 @@ func (x Uint32x4) Greater(y Uint32x4) Mask32x4 {
return a.Xor(signs).Greater(b.Xor(signs))
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint32x4) Less(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).ToInt32x4()
@@ -368,9 +368,9 @@ func (x Uint32x4) Less(y Uint32x4) Mask32x4 {
return b.Xor(signs).Greater(a.Xor(signs))
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).ToInt32x4()
@@ -378,9 +378,9 @@ func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 {
return b.Xor(signs).Greater(a.Xor(signs)).ToInt32x4().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).ToInt32x4()
@@ -388,18 +388,18 @@ func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 {
return a.Xor(signs).Greater(b.Xor(signs)).ToInt32x4().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 {
a, b := x.AsInt32x4(), y.AsInt32x4()
ones := x.Equal(x).ToInt32x4()
return a.Equal(b).ToInt32x4().Xor(ones).asMask()
}
-// Greater returns a mask whose elements indicate whether x > y
+// Greater returns a mask whose elements indicate whether x > y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).ToInt64x2()
@@ -407,9 +407,9 @@ func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
return a.Xor(signs).Greater(b.Xor(signs))
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).ToInt64x2()
@@ -417,9 +417,9 @@ func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
return b.Xor(signs).Greater(a.Xor(signs))
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).ToInt64x2()
@@ -427,9 +427,9 @@ func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
return b.Xor(signs).Greater(a.Xor(signs)).ToInt64x2().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).ToInt64x2()
@@ -437,36 +437,36 @@ func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
return a.Xor(signs).Greater(b.Xor(signs)).ToInt64x2().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 {
a, b := x.AsInt64x2(), y.AsInt64x2()
ones := x.Equal(x).ToInt64x2()
return a.Equal(b).ToInt64x2().Xor(ones).asMask()
}
-// Greater returns a mask whose elements indicate whether x > y
+// Greater returns a mask whose elements indicate whether x > y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x32) Greater(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
signs := BroadcastInt8x32(-1 << (8 - 1))
return a.Xor(signs).Greater(b.Xor(signs))
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x32) Less(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
signs := BroadcastInt8x32(-1 << (8 - 1))
return b.Xor(signs).Greater(a.Xor(signs))
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
ones := x.Equal(x).ToInt8x32()
@@ -474,9 +474,9 @@ func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 {
return b.Xor(signs).Greater(a.Xor(signs)).ToInt8x32().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
ones := x.Equal(x).ToInt8x32()
@@ -484,18 +484,18 @@ func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 {
return a.Xor(signs).Greater(b.Xor(signs)).ToInt8x32().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 {
a, b := x.AsInt8x32(), y.AsInt8x32()
ones := x.Equal(x).ToInt8x32()
return a.Equal(b).ToInt8x32().Xor(ones).asMask()
}
-// Greater returns a mask whose elements indicate whether x > y
+// Greater returns a mask whose elements indicate whether x > y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint16x16) Greater(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).ToInt16x16()
@@ -503,9 +503,9 @@ func (x Uint16x16) Greater(y Uint16x16) Mask16x16 {
return a.Xor(signs).Greater(b.Xor(signs))
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint16x16) Less(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).ToInt16x16()
@@ -513,9 +513,9 @@ func (x Uint16x16) Less(y Uint16x16) Mask16x16 {
return b.Xor(signs).Greater(a.Xor(signs))
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).ToInt16x16()
@@ -523,9 +523,9 @@ func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 {
return b.Xor(signs).Greater(a.Xor(signs)).ToInt16x16().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).ToInt16x16()
@@ -533,18 +533,18 @@ func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 {
return a.Xor(signs).Greater(b.Xor(signs)).ToInt16x16().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 {
a, b := x.AsInt16x16(), y.AsInt16x16()
ones := x.Equal(x).ToInt16x16()
return a.Equal(b).ToInt16x16().Xor(ones).asMask()
}
-// Greater returns a mask whose elements indicate whether x > y
+// Greater returns a mask whose elements indicate whether x > y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint32x8) Greater(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).ToInt32x8()
@@ -552,9 +552,9 @@ func (x Uint32x8) Greater(y Uint32x8) Mask32x8 {
return a.Xor(signs).Greater(b.Xor(signs))
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint32x8) Less(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).ToInt32x8()
@@ -562,9 +562,9 @@ func (x Uint32x8) Less(y Uint32x8) Mask32x8 {
return b.Xor(signs).Greater(a.Xor(signs))
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).ToInt32x8()
@@ -572,9 +572,9 @@ func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 {
return b.Xor(signs).Greater(a.Xor(signs)).ToInt32x8().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).ToInt32x8()
@@ -582,18 +582,18 @@ func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 {
return a.Xor(signs).Greater(b.Xor(signs)).ToInt32x8().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 {
a, b := x.AsInt32x8(), y.AsInt32x8()
ones := x.Equal(x).ToInt32x8()
return a.Equal(b).ToInt32x8().Xor(ones).asMask()
}
-// Greater returns a mask whose elements indicate whether x > y
+// Greater returns a mask whose elements indicate whether x > y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint64x4) Greater(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).ToInt64x4()
@@ -601,9 +601,9 @@ func (x Uint64x4) Greater(y Uint64x4) Mask64x4 {
return a.Xor(signs).Greater(b.Xor(signs))
}
-// Less returns a mask whose elements indicate whether x < y
+// Less returns a mask whose elements indicate whether x < y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint64x4) Less(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).ToInt64x4()
@@ -611,9 +611,9 @@ func (x Uint64x4) Less(y Uint64x4) Mask64x4 {
return b.Xor(signs).Greater(a.Xor(signs))
}
-// GreaterEqual returns a mask whose elements indicate whether x >= y
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).ToInt64x4()
@@ -621,9 +621,9 @@ func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 {
return b.Xor(signs).Greater(a.Xor(signs)).ToInt64x4().Xor(ones).asMask()
}
-// LessEqual returns a mask whose elements indicate whether x <= y
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).ToInt64x4()
@@ -631,9 +631,9 @@ func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 {
return a.Xor(signs).Greater(b.Xor(signs)).ToInt64x4().Xor(ones).asMask()
}
-// NotEqual returns a mask whose elements indicate whether x != y
+// NotEqual returns a mask whose elements indicate whether x != y.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 {
a, b := x.AsInt64x4(), y.AsInt64x4()
ones := x.Equal(x).ToInt64x4()
diff --git a/src/simd/archsimd/cpu.go b/src/simd/archsimd/cpu.go
index bb0ebbc16a..d0c0ff5426 100644
--- a/src/simd/archsimd/cpu.go
+++ b/src/simd/archsimd/cpu.go
@@ -1,4 +1,4 @@
-// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
//go:build goexperiment.simd
diff --git a/src/simd/archsimd/extra_amd64.go b/src/simd/archsimd/extra_amd64.go
index 921e148f63..b0dba6d234 100644
--- a/src/simd/archsimd/extra_amd64.go
+++ b/src/simd/archsimd/extra_amd64.go
@@ -19,7 +19,7 @@ func ClearAVXUpperBits()
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Int8x16) IsZero() bool
@@ -27,7 +27,7 @@ func (x Int8x16) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Int8x32) IsZero() bool
@@ -35,7 +35,7 @@ func (x Int8x32) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Int16x8) IsZero() bool
@@ -43,7 +43,7 @@ func (x Int16x8) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Int16x16) IsZero() bool
@@ -51,7 +51,7 @@ func (x Int16x16) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Int32x4) IsZero() bool
@@ -59,7 +59,7 @@ func (x Int32x4) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Int32x8) IsZero() bool
@@ -67,7 +67,7 @@ func (x Int32x8) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Int64x2) IsZero() bool
@@ -75,7 +75,7 @@ func (x Int64x2) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Int64x4) IsZero() bool
@@ -83,7 +83,7 @@ func (x Int64x4) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Uint8x16) IsZero() bool
@@ -91,7 +91,7 @@ func (x Uint8x16) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Uint8x32) IsZero() bool
@@ -99,7 +99,7 @@ func (x Uint8x32) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Uint16x8) IsZero() bool
@@ -107,7 +107,7 @@ func (x Uint16x8) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Uint16x16) IsZero() bool
@@ -115,7 +115,7 @@ func (x Uint16x16) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Uint32x4) IsZero() bool
@@ -123,7 +123,7 @@ func (x Uint32x4) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Uint32x8) IsZero() bool
@@ -131,7 +131,7 @@ func (x Uint32x8) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Uint64x2) IsZero() bool
@@ -139,7 +139,43 @@ func (x Uint64x2) IsZero() bool
// IsZero returns true if all elements of x are zeros.
//
// This method compiles to VPTEST x, x.
-// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y.
//
// Asm: VPTEST, CPU Feature: AVX
func (x Uint64x4) IsZero() bool
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) IsNaN() Mask32x4
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) IsNaN() Mask32x8
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPS, CPU Feature: AVX512
+func (x Float32x16) IsNaN() Mask32x16
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) IsNaN() Mask64x2
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) IsNaN() Mask64x4
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPD, CPU Feature: AVX512
+func (x Float64x8) IsNaN() Mask64x8
diff --git a/src/simd/archsimd/internal/simd_test/binary_helpers_test.go b/src/simd/archsimd/internal/simd_test/binary_helpers_test.go
index 9c361dbeb9..c725f657f3 100644
--- a/src/simd/archsimd/internal/simd_test/binary_helpers_test.go
+++ b/src/simd/archsimd/internal/simd_test/binary_helpers_test.go
@@ -1,6 +1,6 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
-//go:build goexperiment.simd
+//go:build goexperiment.simd && amd64
// This file contains functions testing binary simd methods.
// Each function in this file is specialized for a
diff --git a/src/simd/archsimd/internal/simd_test/binary_test.go b/src/simd/archsimd/internal/simd_test/binary_test.go
index fa2b9511ca..28efdcb52f 100644
--- a/src/simd/archsimd/internal/simd_test/binary_test.go
+++ b/src/simd/archsimd/internal/simd_test/binary_test.go
@@ -17,23 +17,29 @@ func TestAdd(t *testing.T) {
testFloat64x2Binary(t, archsimd.Float64x2.Add, addSlice[float64])
testFloat64x4Binary(t, archsimd.Float64x4.Add, addSlice[float64])
- testInt16x16Binary(t, archsimd.Int16x16.Add, addSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Add, addSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Add, addSlice[int32])
- testInt32x8Binary(t, archsimd.Int32x8.Add, addSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.Add, addSlice[int64])
- testInt64x4Binary(t, archsimd.Int64x4.Add, addSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.Add, addSlice[int8])
- testInt8x32Binary(t, archsimd.Int8x32.Add, addSlice[int8])
- testUint16x16Binary(t, archsimd.Uint16x16.Add, addSlice[uint16])
- testUint16x8Binary(t, archsimd.Uint16x8.Add, addSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Add, addSlice[uint32])
- testUint32x8Binary(t, archsimd.Uint32x8.Add, addSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.Add, addSlice[uint64])
- testUint64x4Binary(t, archsimd.Uint64x4.Add, addSlice[uint64])
+ testUint16x8Binary(t, archsimd.Uint16x8.Add, addSlice[uint16])
testUint8x16Binary(t, archsimd.Uint8x16.Add, addSlice[uint8])
- testUint8x32Binary(t, archsimd.Uint8x32.Add, addSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Binary(t, archsimd.Uint16x16.Add, addSlice[uint16])
+ testUint32x8Binary(t, archsimd.Uint32x8.Add, addSlice[uint32])
+ testUint64x4Binary(t, archsimd.Uint64x4.Add, addSlice[uint64])
+ testUint8x32Binary(t, archsimd.Uint8x32.Add, addSlice[uint8])
+ }
+
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.Add, addSlice[int16])
+ testInt32x8Binary(t, archsimd.Int32x8.Add, addSlice[int32])
+ testInt64x4Binary(t, archsimd.Int64x4.Add, addSlice[int64])
+ testInt8x32Binary(t, archsimd.Int8x32.Add, addSlice[int8])
+ }
if archsimd.X86.AVX512() {
testFloat32x16Binary(t, archsimd.Float32x16.Add, addSlice[float32])
@@ -55,23 +61,29 @@ func TestSub(t *testing.T) {
testFloat64x2Binary(t, archsimd.Float64x2.Sub, subSlice[float64])
testFloat64x4Binary(t, archsimd.Float64x4.Sub, subSlice[float64])
- testInt16x16Binary(t, archsimd.Int16x16.Sub, subSlice[int16])
- testInt16x8Binary(t, archsimd.Int16x8.Sub, subSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Sub, subSlice[int32])
- testInt32x8Binary(t, archsimd.Int32x8.Sub, subSlice[int32])
+ testInt16x8Binary(t, archsimd.Int16x8.Sub, subSlice[int16])
testInt64x2Binary(t, archsimd.Int64x2.Sub, subSlice[int64])
- testInt64x4Binary(t, archsimd.Int64x4.Sub, subSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.Sub, subSlice[int8])
- testInt8x32Binary(t, archsimd.Int8x32.Sub, subSlice[int8])
- testUint16x16Binary(t, archsimd.Uint16x16.Sub, subSlice[uint16])
- testUint16x8Binary(t, archsimd.Uint16x8.Sub, subSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.Sub, subSlice[int16])
+ testInt32x8Binary(t, archsimd.Int32x8.Sub, subSlice[int32])
+ testInt64x4Binary(t, archsimd.Int64x4.Sub, subSlice[int64])
+ testInt8x32Binary(t, archsimd.Int8x32.Sub, subSlice[int8])
+ }
+
testUint32x4Binary(t, archsimd.Uint32x4.Sub, subSlice[uint32])
- testUint32x8Binary(t, archsimd.Uint32x8.Sub, subSlice[uint32])
+ testUint16x8Binary(t, archsimd.Uint16x8.Sub, subSlice[uint16])
testUint64x2Binary(t, archsimd.Uint64x2.Sub, subSlice[uint64])
- testUint64x4Binary(t, archsimd.Uint64x4.Sub, subSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.Sub, subSlice[uint8])
- testUint8x32Binary(t, archsimd.Uint8x32.Sub, subSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Binary(t, archsimd.Uint16x16.Sub, subSlice[uint16])
+ testUint32x8Binary(t, archsimd.Uint32x8.Sub, subSlice[uint32])
+ testUint64x4Binary(t, archsimd.Uint64x4.Sub, subSlice[uint64])
+ testUint8x32Binary(t, archsimd.Uint8x32.Sub, subSlice[uint8])
+ }
if archsimd.X86.AVX512() {
testFloat32x16Binary(t, archsimd.Float32x16.Sub, subSlice[float32])
@@ -93,10 +105,13 @@ func TestMax(t *testing.T) {
// testFloat64x2Binary(t, archsimd.Float64x2.Max, maxSlice[float64]) // nan is wrong
// testFloat64x4Binary(t, archsimd.Float64x4.Max, maxSlice[float64]) // nan is wrong
- testInt16x16Binary(t, archsimd.Int16x16.Max, maxSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Max, maxSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Max, maxSlice[int32])
- testInt32x8Binary(t, archsimd.Int32x8.Max, maxSlice[int32])
+
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.Max, maxSlice[int16])
+ testInt32x8Binary(t, archsimd.Int32x8.Max, maxSlice[int32])
+ }
if archsimd.X86.AVX512() {
testInt64x2Binary(t, archsimd.Int64x2.Max, maxSlice[int64])
@@ -104,12 +119,18 @@ func TestMax(t *testing.T) {
}
testInt8x16Binary(t, archsimd.Int8x16.Max, maxSlice[int8])
- testInt8x32Binary(t, archsimd.Int8x32.Max, maxSlice[int8])
- testUint16x16Binary(t, archsimd.Uint16x16.Max, maxSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt8x32Binary(t, archsimd.Int8x32.Max, maxSlice[int8])
+ }
+
testUint16x8Binary(t, archsimd.Uint16x8.Max, maxSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Max, maxSlice[uint32])
- testUint32x8Binary(t, archsimd.Uint32x8.Max, maxSlice[uint32])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Binary(t, archsimd.Uint16x16.Max, maxSlice[uint16])
+ testUint32x8Binary(t, archsimd.Uint32x8.Max, maxSlice[uint32])
+ }
if archsimd.X86.AVX512() {
testUint64x2Binary(t, archsimd.Uint64x2.Max, maxSlice[uint64])
@@ -117,7 +138,10 @@ func TestMax(t *testing.T) {
}
testUint8x16Binary(t, archsimd.Uint8x16.Max, maxSlice[uint8])
- testUint8x32Binary(t, archsimd.Uint8x32.Max, maxSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint8x32Binary(t, archsimd.Uint8x32.Max, maxSlice[uint8])
+ }
if archsimd.X86.AVX512() {
// testFloat32x16Binary(t, archsimd.Float32x16.Max, maxSlice[float32]) // nan is wrong
@@ -139,10 +163,13 @@ func TestMin(t *testing.T) {
// testFloat64x2Binary(t, archsimd.Float64x2.Min, minSlice[float64]) // nan is wrong
// testFloat64x4Binary(t, archsimd.Float64x4.Min, minSlice[float64]) // nan is wrong
- testInt16x16Binary(t, archsimd.Int16x16.Min, minSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Min, minSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Min, minSlice[int32])
- testInt32x8Binary(t, archsimd.Int32x8.Min, minSlice[int32])
+
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.Min, minSlice[int16])
+ testInt32x8Binary(t, archsimd.Int32x8.Min, minSlice[int32])
+ }
if archsimd.X86.AVX512() {
testInt64x2Binary(t, archsimd.Int64x2.Min, minSlice[int64])
@@ -150,12 +177,18 @@ func TestMin(t *testing.T) {
}
testInt8x16Binary(t, archsimd.Int8x16.Min, minSlice[int8])
- testInt8x32Binary(t, archsimd.Int8x32.Min, minSlice[int8])
- testUint16x16Binary(t, archsimd.Uint16x16.Min, minSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt8x32Binary(t, archsimd.Int8x32.Min, minSlice[int8])
+ }
+
testUint16x8Binary(t, archsimd.Uint16x8.Min, minSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Min, minSlice[uint32])
- testUint32x8Binary(t, archsimd.Uint32x8.Min, minSlice[uint32])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Binary(t, archsimd.Uint16x16.Min, minSlice[uint16])
+ testUint32x8Binary(t, archsimd.Uint32x8.Min, minSlice[uint32])
+ }
if archsimd.X86.AVX512() {
testUint64x2Binary(t, archsimd.Uint64x2.Min, minSlice[uint64])
@@ -163,7 +196,10 @@ func TestMin(t *testing.T) {
}
testUint8x16Binary(t, archsimd.Uint8x16.Min, minSlice[uint8])
- testUint8x32Binary(t, archsimd.Uint8x32.Min, minSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint8x32Binary(t, archsimd.Uint8x32.Min, minSlice[uint8])
+ }
if archsimd.X86.AVX512() {
// testFloat32x16Binary(t, archsimd.Float32x16.Min, minSlice[float32]) // nan is wrong
@@ -180,23 +216,29 @@ func TestMin(t *testing.T) {
}
func TestAnd(t *testing.T) {
- testInt16x16Binary(t, archsimd.Int16x16.And, andSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.And, andSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.And, andSlice[int32])
- testInt32x8Binary(t, archsimd.Int32x8.And, andSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.And, andSlice[int64])
- testInt64x4Binary(t, archsimd.Int64x4.And, andSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.And, andSlice[int8])
- testInt8x32Binary(t, archsimd.Int8x32.And, andSlice[int8])
- testUint16x16Binary(t, archsimd.Uint16x16.And, andSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.And, andSlice[int16])
+ testInt32x8Binary(t, archsimd.Int32x8.And, andSlice[int32])
+ testInt64x4Binary(t, archsimd.Int64x4.And, andSlice[int64])
+ testInt8x32Binary(t, archsimd.Int8x32.And, andSlice[int8])
+ }
+
testUint16x8Binary(t, archsimd.Uint16x8.And, andSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.And, andSlice[uint32])
- testUint32x8Binary(t, archsimd.Uint32x8.And, andSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.And, andSlice[uint64])
- testUint64x4Binary(t, archsimd.Uint64x4.And, andSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.And, andSlice[uint8])
- testUint8x32Binary(t, archsimd.Uint8x32.And, andSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Binary(t, archsimd.Uint16x16.And, andSlice[uint16])
+ testUint32x8Binary(t, archsimd.Uint32x8.And, andSlice[uint32])
+ testUint64x4Binary(t, archsimd.Uint64x4.And, andSlice[uint64])
+ testUint8x32Binary(t, archsimd.Uint8x32.And, andSlice[uint8])
+ }
if archsimd.X86.AVX512() {
// testInt8x64Binary(t, archsimd.Int8x64.And, andISlice[int8]) // missing
@@ -211,23 +253,29 @@ func TestAnd(t *testing.T) {
}
func TestAndNot(t *testing.T) {
- testInt16x16Binary(t, archsimd.Int16x16.AndNot, andNotSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.AndNot, andNotSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.AndNot, andNotSlice[int32])
- testInt32x8Binary(t, archsimd.Int32x8.AndNot, andNotSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.AndNot, andNotSlice[int64])
- testInt64x4Binary(t, archsimd.Int64x4.AndNot, andNotSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.AndNot, andNotSlice[int8])
- testInt8x32Binary(t, archsimd.Int8x32.AndNot, andNotSlice[int8])
- testUint16x16Binary(t, archsimd.Uint16x16.AndNot, andNotSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.AndNot, andNotSlice[int16])
+ testInt32x8Binary(t, archsimd.Int32x8.AndNot, andNotSlice[int32])
+ testInt64x4Binary(t, archsimd.Int64x4.AndNot, andNotSlice[int64])
+ testInt8x32Binary(t, archsimd.Int8x32.AndNot, andNotSlice[int8])
+ }
+
+ testUint8x16Binary(t, archsimd.Uint8x16.AndNot, andNotSlice[uint8])
testUint16x8Binary(t, archsimd.Uint16x8.AndNot, andNotSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.AndNot, andNotSlice[uint32])
- testUint32x8Binary(t, archsimd.Uint32x8.AndNot, andNotSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.AndNot, andNotSlice[uint64])
- testUint64x4Binary(t, archsimd.Uint64x4.AndNot, andNotSlice[uint64])
- testUint8x16Binary(t, archsimd.Uint8x16.AndNot, andNotSlice[uint8])
- testUint8x32Binary(t, archsimd.Uint8x32.AndNot, andNotSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Binary(t, archsimd.Uint16x16.AndNot, andNotSlice[uint16])
+ testUint32x8Binary(t, archsimd.Uint32x8.AndNot, andNotSlice[uint32])
+ testUint64x4Binary(t, archsimd.Uint64x4.AndNot, andNotSlice[uint64])
+ testUint8x32Binary(t, archsimd.Uint8x32.AndNot, andNotSlice[uint8])
+ }
if archsimd.X86.AVX512() {
testInt8x64Binary(t, archsimd.Int8x64.AndNot, andNotSlice[int8])
@@ -242,23 +290,29 @@ func TestAndNot(t *testing.T) {
}
func TestXor(t *testing.T) {
- testInt16x16Binary(t, archsimd.Int16x16.Xor, xorSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Xor, xorSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Xor, xorSlice[int32])
- testInt32x8Binary(t, archsimd.Int32x8.Xor, xorSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.Xor, xorSlice[int64])
- testInt64x4Binary(t, archsimd.Int64x4.Xor, xorSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.Xor, xorSlice[int8])
- testInt8x32Binary(t, archsimd.Int8x32.Xor, xorSlice[int8])
- testUint16x16Binary(t, archsimd.Uint16x16.Xor, xorSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.Xor, xorSlice[int16])
+ testInt32x8Binary(t, archsimd.Int32x8.Xor, xorSlice[int32])
+ testInt64x4Binary(t, archsimd.Int64x4.Xor, xorSlice[int64])
+ testInt8x32Binary(t, archsimd.Int8x32.Xor, xorSlice[int8])
+ }
+
testUint16x8Binary(t, archsimd.Uint16x8.Xor, xorSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Xor, xorSlice[uint32])
- testUint32x8Binary(t, archsimd.Uint32x8.Xor, xorSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.Xor, xorSlice[uint64])
- testUint64x4Binary(t, archsimd.Uint64x4.Xor, xorSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.Xor, xorSlice[uint8])
- testUint8x32Binary(t, archsimd.Uint8x32.Xor, xorSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Binary(t, archsimd.Uint16x16.Xor, xorSlice[uint16])
+ testUint32x8Binary(t, archsimd.Uint32x8.Xor, xorSlice[uint32])
+ testUint64x4Binary(t, archsimd.Uint64x4.Xor, xorSlice[uint64])
+ testUint8x32Binary(t, archsimd.Uint8x32.Xor, xorSlice[uint8])
+ }
if archsimd.X86.AVX512() {
// testInt8x64Binary(t, archsimd.Int8x64.Xor, andISlice[int8]) // missing
@@ -273,23 +327,29 @@ func TestXor(t *testing.T) {
}
func TestOr(t *testing.T) {
- testInt16x16Binary(t, archsimd.Int16x16.Or, orSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Or, orSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Or, orSlice[int32])
- testInt32x8Binary(t, archsimd.Int32x8.Or, orSlice[int32])
testInt64x2Binary(t, archsimd.Int64x2.Or, orSlice[int64])
- testInt64x4Binary(t, archsimd.Int64x4.Or, orSlice[int64])
testInt8x16Binary(t, archsimd.Int8x16.Or, orSlice[int8])
- testInt8x32Binary(t, archsimd.Int8x32.Or, orSlice[int8])
- testUint16x16Binary(t, archsimd.Uint16x16.Or, orSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.Or, orSlice[int16])
+ testInt32x8Binary(t, archsimd.Int32x8.Or, orSlice[int32])
+ testInt64x4Binary(t, archsimd.Int64x4.Or, orSlice[int64])
+ testInt8x32Binary(t, archsimd.Int8x32.Or, orSlice[int8])
+ }
+
testUint16x8Binary(t, archsimd.Uint16x8.Or, orSlice[uint16])
testUint32x4Binary(t, archsimd.Uint32x4.Or, orSlice[uint32])
- testUint32x8Binary(t, archsimd.Uint32x8.Or, orSlice[uint32])
testUint64x2Binary(t, archsimd.Uint64x2.Or, orSlice[uint64])
- testUint64x4Binary(t, archsimd.Uint64x4.Or, orSlice[uint64])
testUint8x16Binary(t, archsimd.Uint8x16.Or, orSlice[uint8])
- testUint8x32Binary(t, archsimd.Uint8x32.Or, orSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Binary(t, archsimd.Uint16x16.Or, orSlice[uint16])
+ testUint32x8Binary(t, archsimd.Uint32x8.Or, orSlice[uint32])
+ testUint64x4Binary(t, archsimd.Uint64x4.Or, orSlice[uint64])
+ testUint8x32Binary(t, archsimd.Uint8x32.Or, orSlice[uint8])
+ }
if archsimd.X86.AVX512() {
// testInt8x64Binary(t, archsimd.Int8x64.Or, andISlice[int8]) // missing
@@ -309,10 +369,13 @@ func TestMul(t *testing.T) {
testFloat64x2Binary(t, archsimd.Float64x2.Mul, mulSlice[float64])
testFloat64x4Binary(t, archsimd.Float64x4.Mul, mulSlice[float64])
- testInt16x16Binary(t, archsimd.Int16x16.Mul, mulSlice[int16])
testInt16x8Binary(t, archsimd.Int16x8.Mul, mulSlice[int16])
testInt32x4Binary(t, archsimd.Int32x4.Mul, mulSlice[int32])
- testInt32x8Binary(t, archsimd.Int32x8.Mul, mulSlice[int32])
+
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.Mul, mulSlice[int16])
+ testInt32x8Binary(t, archsimd.Int32x8.Mul, mulSlice[int32])
+ }
// testInt8x16Binary(t, archsimd.Int8x16.Mul, mulSlice[int8]) // nope
// testInt8x32Binary(t, archsimd.Int8x32.Mul, mulSlice[int8])
diff --git a/src/simd/archsimd/internal/simd_test/compare_helpers_test.go b/src/simd/archsimd/internal/simd_test/compare_helpers_test.go
index 279fdc7155..7a33f0ffa4 100644
--- a/src/simd/archsimd/internal/simd_test/compare_helpers_test.go
+++ b/src/simd/archsimd/internal/simd_test/compare_helpers_test.go
@@ -1,6 +1,6 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
-//go:build goexperiment.simd
+//go:build goexperiment.simd && amd64
// This file contains functions testing simd methods that compare two operands.
// Each function in this file is specialized for a
@@ -462,3 +462,87 @@ func testFloat64x8Compare(t *testing.T, f func(_, _ archsimd.Float64x8) archsimd
return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y) })
})
}
+
+// testFloat32x4UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want
+func testFloat32x4UnaryCompare(t *testing.T, f func(x archsimd.Float32x4) archsimd.Mask32x4, want func(x []float32) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x4Slice(x)
+ g := make([]int32, n)
+ f(a).ToInt32x4().StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want
+func testFloat64x2UnaryCompare(t *testing.T, f func(x archsimd.Float64x2) archsimd.Mask64x2, want func(x []float64) []int64) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]int64, n)
+ f(a).ToInt64x2().StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x8UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want
+func testFloat32x8UnaryCompare(t *testing.T, f func(x archsimd.Float32x8) archsimd.Mask32x8, want func(x []float32) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x8Slice(x)
+ g := make([]int32, n)
+ f(a).ToInt32x8().StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x4UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want
+func testFloat64x4UnaryCompare(t *testing.T, f func(x archsimd.Float64x4) archsimd.Mask64x4, want func(x []float64) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x4Slice(x)
+ g := make([]int64, n)
+ f(a).ToInt64x4().StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x16UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want
+func testFloat32x16UnaryCompare(t *testing.T, f func(x archsimd.Float32x16) archsimd.Mask32x16, want func(x []float32) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x16Slice(x)
+ g := make([]int32, n)
+ f(a).ToInt32x16().StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x8UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want
+func testFloat64x8UnaryCompare(t *testing.T, f func(x archsimd.Float64x8) archsimd.Mask64x8, want func(x []float64) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x8Slice(x)
+ g := make([]int64, n)
+ f(a).ToInt64x8().StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
diff --git a/src/simd/archsimd/internal/simd_test/compare_test.go b/src/simd/archsimd/internal/simd_test/compare_test.go
index 4485e9bdaa..ea8514ac93 100644
--- a/src/simd/archsimd/internal/simd_test/compare_test.go
+++ b/src/simd/archsimd/internal/simd_test/compare_test.go
@@ -21,32 +21,39 @@ func TestLess(t *testing.T) {
testFloat64x2Compare(t, archsimd.Float64x2.Less, lessSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.Less, lessSlice[float64])
- testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32])
- testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64])
- testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8])
- testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8])
- testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16])
+ testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32])
+ testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64])
+ testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8])
+
+ testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16])
+ testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32])
+ testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64])
+ testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8])
+ }
+
testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32])
- testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64])
- testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8])
- testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8])
- testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16])
testUint16x8Compare(t, archsimd.Uint16x8.Less, lessSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.Less, lessSlice[uint32])
- testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.Less, lessSlice[uint64])
- testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.Less, lessSlice[uint8])
- testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16])
+ testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32])
+ testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64])
+ testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8])
+ }
if archsimd.X86.AVX512() {
testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16])
@@ -77,23 +84,29 @@ func TestLessEqual(t *testing.T) {
testFloat64x2Compare(t, archsimd.Float64x2.LessEqual, lessEqualSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.LessEqual, lessEqualSlice[float64])
- testInt16x16Compare(t, archsimd.Int16x16.LessEqual, lessEqualSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.LessEqual, lessEqualSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.LessEqual, lessEqualSlice[int32])
- testInt32x8Compare(t, archsimd.Int32x8.LessEqual, lessEqualSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.LessEqual, lessEqualSlice[int64])
- testInt64x4Compare(t, archsimd.Int64x4.LessEqual, lessEqualSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.LessEqual, lessEqualSlice[int8])
- testInt8x32Compare(t, archsimd.Int8x32.LessEqual, lessEqualSlice[int8])
- testUint16x16Compare(t, archsimd.Uint16x16.LessEqual, lessEqualSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Compare(t, archsimd.Int16x16.LessEqual, lessEqualSlice[int16])
+ testInt32x8Compare(t, archsimd.Int32x8.LessEqual, lessEqualSlice[int32])
+ testInt64x4Compare(t, archsimd.Int64x4.LessEqual, lessEqualSlice[int64])
+ testInt8x32Compare(t, archsimd.Int8x32.LessEqual, lessEqualSlice[int8])
+ }
+
testUint16x8Compare(t, archsimd.Uint16x8.LessEqual, lessEqualSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.LessEqual, lessEqualSlice[uint32])
- testUint32x8Compare(t, archsimd.Uint32x8.LessEqual, lessEqualSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.LessEqual, lessEqualSlice[uint64])
- testUint64x4Compare(t, archsimd.Uint64x4.LessEqual, lessEqualSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.LessEqual, lessEqualSlice[uint8])
- testUint8x32Compare(t, archsimd.Uint8x32.LessEqual, lessEqualSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Compare(t, archsimd.Uint16x16.LessEqual, lessEqualSlice[uint16])
+ testUint32x8Compare(t, archsimd.Uint32x8.LessEqual, lessEqualSlice[uint32])
+ testUint64x4Compare(t, archsimd.Uint64x4.LessEqual, lessEqualSlice[uint64])
+ testUint8x32Compare(t, archsimd.Uint8x32.LessEqual, lessEqualSlice[uint8])
+ }
if archsimd.X86.AVX512() {
testFloat32x16Compare(t, archsimd.Float32x16.LessEqual, lessEqualSlice[float32])
@@ -115,25 +128,29 @@ func TestGreater(t *testing.T) {
testFloat64x2Compare(t, archsimd.Float64x2.Greater, greaterSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.Greater, greaterSlice[float64])
- testInt16x16Compare(t, archsimd.Int16x16.Greater, greaterSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.Greater, greaterSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.Greater, greaterSlice[int32])
- testInt32x8Compare(t, archsimd.Int32x8.Greater, greaterSlice[int32])
-
testInt64x2Compare(t, archsimd.Int64x2.Greater, greaterSlice[int64])
- testInt64x4Compare(t, archsimd.Int64x4.Greater, greaterSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.Greater, greaterSlice[int8])
- testInt8x32Compare(t, archsimd.Int8x32.Greater, greaterSlice[int8])
- testUint16x16Compare(t, archsimd.Uint16x16.Greater, greaterSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Compare(t, archsimd.Int16x16.Greater, greaterSlice[int16])
+ testInt32x8Compare(t, archsimd.Int32x8.Greater, greaterSlice[int32])
+ testInt64x4Compare(t, archsimd.Int64x4.Greater, greaterSlice[int64])
+ testInt8x32Compare(t, archsimd.Int8x32.Greater, greaterSlice[int8])
+ }
+
testUint16x8Compare(t, archsimd.Uint16x8.Greater, greaterSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.Greater, greaterSlice[uint32])
- testUint32x8Compare(t, archsimd.Uint32x8.Greater, greaterSlice[uint32])
-
testUint64x2Compare(t, archsimd.Uint64x2.Greater, greaterSlice[uint64])
- testUint64x4Compare(t, archsimd.Uint64x4.Greater, greaterSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.Greater, greaterSlice[uint8])
- testUint8x32Compare(t, archsimd.Uint8x32.Greater, greaterSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Compare(t, archsimd.Uint16x16.Greater, greaterSlice[uint16])
+ testUint32x8Compare(t, archsimd.Uint32x8.Greater, greaterSlice[uint32])
+ testUint64x4Compare(t, archsimd.Uint64x4.Greater, greaterSlice[uint64])
+ testUint8x32Compare(t, archsimd.Uint8x32.Greater, greaterSlice[uint8])
+ }
if archsimd.X86.AVX512() {
@@ -156,23 +173,29 @@ func TestGreaterEqual(t *testing.T) {
testFloat64x2Compare(t, archsimd.Float64x2.GreaterEqual, greaterEqualSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.GreaterEqual, greaterEqualSlice[float64])
- testInt16x16Compare(t, archsimd.Int16x16.GreaterEqual, greaterEqualSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.GreaterEqual, greaterEqualSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.GreaterEqual, greaterEqualSlice[int32])
- testInt32x8Compare(t, archsimd.Int32x8.GreaterEqual, greaterEqualSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.GreaterEqual, greaterEqualSlice[int64])
- testInt64x4Compare(t, archsimd.Int64x4.GreaterEqual, greaterEqualSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.GreaterEqual, greaterEqualSlice[int8])
- testInt8x32Compare(t, archsimd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
- testUint16x16Compare(t, archsimd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Compare(t, archsimd.Int16x16.GreaterEqual, greaterEqualSlice[int16])
+ testInt32x8Compare(t, archsimd.Int32x8.GreaterEqual, greaterEqualSlice[int32])
+ testInt64x4Compare(t, archsimd.Int64x4.GreaterEqual, greaterEqualSlice[int64])
+ testInt8x32Compare(t, archsimd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
+ }
+
testUint16x8Compare(t, archsimd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32])
- testUint32x8Compare(t, archsimd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64])
- testUint64x4Compare(t, archsimd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8])
- testUint8x32Compare(t, archsimd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Compare(t, archsimd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
+ testUint32x8Compare(t, archsimd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32])
+ testUint64x4Compare(t, archsimd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64])
+ testUint8x32Compare(t, archsimd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
+ }
if archsimd.X86.AVX512() {
testFloat32x16Compare(t, archsimd.Float32x16.GreaterEqual, greaterEqualSlice[float32])
@@ -194,23 +217,29 @@ func TestEqual(t *testing.T) {
testFloat64x2Compare(t, archsimd.Float64x2.Equal, equalSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.Equal, equalSlice[float64])
- testInt16x16Compare(t, archsimd.Int16x16.Equal, equalSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.Equal, equalSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.Equal, equalSlice[int32])
- testInt32x8Compare(t, archsimd.Int32x8.Equal, equalSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.Equal, equalSlice[int64])
- testInt64x4Compare(t, archsimd.Int64x4.Equal, equalSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.Equal, equalSlice[int8])
- testInt8x32Compare(t, archsimd.Int8x32.Equal, equalSlice[int8])
- testUint16x16Compare(t, archsimd.Uint16x16.Equal, equalSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Compare(t, archsimd.Int16x16.Equal, equalSlice[int16])
+ testInt32x8Compare(t, archsimd.Int32x8.Equal, equalSlice[int32])
+ testInt64x4Compare(t, archsimd.Int64x4.Equal, equalSlice[int64])
+ testInt8x32Compare(t, archsimd.Int8x32.Equal, equalSlice[int8])
+ }
+
testUint16x8Compare(t, archsimd.Uint16x8.Equal, equalSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.Equal, equalSlice[uint32])
- testUint32x8Compare(t, archsimd.Uint32x8.Equal, equalSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.Equal, equalSlice[uint64])
- testUint64x4Compare(t, archsimd.Uint64x4.Equal, equalSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.Equal, equalSlice[uint8])
- testUint8x32Compare(t, archsimd.Uint8x32.Equal, equalSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Compare(t, archsimd.Uint16x16.Equal, equalSlice[uint16])
+ testUint32x8Compare(t, archsimd.Uint32x8.Equal, equalSlice[uint32])
+ testUint64x4Compare(t, archsimd.Uint64x4.Equal, equalSlice[uint64])
+ testUint8x32Compare(t, archsimd.Uint8x32.Equal, equalSlice[uint8])
+ }
if archsimd.X86.AVX512() {
testFloat32x16Compare(t, archsimd.Float32x16.Equal, equalSlice[float32])
@@ -232,23 +261,29 @@ func TestNotEqual(t *testing.T) {
testFloat64x2Compare(t, archsimd.Float64x2.NotEqual, notEqualSlice[float64])
testFloat64x4Compare(t, archsimd.Float64x4.NotEqual, notEqualSlice[float64])
- testInt16x16Compare(t, archsimd.Int16x16.NotEqual, notEqualSlice[int16])
testInt16x8Compare(t, archsimd.Int16x8.NotEqual, notEqualSlice[int16])
testInt32x4Compare(t, archsimd.Int32x4.NotEqual, notEqualSlice[int32])
- testInt32x8Compare(t, archsimd.Int32x8.NotEqual, notEqualSlice[int32])
testInt64x2Compare(t, archsimd.Int64x2.NotEqual, notEqualSlice[int64])
- testInt64x4Compare(t, archsimd.Int64x4.NotEqual, notEqualSlice[int64])
testInt8x16Compare(t, archsimd.Int8x16.NotEqual, notEqualSlice[int8])
- testInt8x32Compare(t, archsimd.Int8x32.NotEqual, notEqualSlice[int8])
- testUint16x16Compare(t, archsimd.Uint16x16.NotEqual, notEqualSlice[uint16])
+ if archsimd.X86.AVX2() {
+ testInt16x16Compare(t, archsimd.Int16x16.NotEqual, notEqualSlice[int16])
+ testInt32x8Compare(t, archsimd.Int32x8.NotEqual, notEqualSlice[int32])
+ testInt64x4Compare(t, archsimd.Int64x4.NotEqual, notEqualSlice[int64])
+ testInt8x32Compare(t, archsimd.Int8x32.NotEqual, notEqualSlice[int8])
+ }
+
testUint16x8Compare(t, archsimd.Uint16x8.NotEqual, notEqualSlice[uint16])
testUint32x4Compare(t, archsimd.Uint32x4.NotEqual, notEqualSlice[uint32])
- testUint32x8Compare(t, archsimd.Uint32x8.NotEqual, notEqualSlice[uint32])
testUint64x2Compare(t, archsimd.Uint64x2.NotEqual, notEqualSlice[uint64])
- testUint64x4Compare(t, archsimd.Uint64x4.NotEqual, notEqualSlice[uint64])
testUint8x16Compare(t, archsimd.Uint8x16.NotEqual, notEqualSlice[uint8])
- testUint8x32Compare(t, archsimd.Uint8x32.NotEqual, notEqualSlice[uint8])
+
+ if archsimd.X86.AVX2() {
+ testUint16x16Compare(t, archsimd.Uint16x16.NotEqual, notEqualSlice[uint16])
+ testUint32x8Compare(t, archsimd.Uint32x8.NotEqual, notEqualSlice[uint32])
+ testUint64x4Compare(t, archsimd.Uint64x4.NotEqual, notEqualSlice[uint64])
+ testUint8x32Compare(t, archsimd.Uint8x32.NotEqual, notEqualSlice[uint8])
+ }
if archsimd.X86.AVX512() {
testFloat32x16Compare(t, archsimd.Float32x16.NotEqual, notEqualSlice[float32])
@@ -263,3 +298,49 @@ func TestNotEqual(t *testing.T) {
testUint64x8Compare(t, archsimd.Uint64x8.NotEqual, notEqualSlice[uint64])
}
}
+
+func TestIsNaN(t *testing.T) {
+ testFloat32x4UnaryCompare(t, archsimd.Float32x4.IsNaN, isNaNSlice[float32])
+ testFloat32x8UnaryCompare(t, archsimd.Float32x8.IsNaN, isNaNSlice[float32])
+ testFloat64x2UnaryCompare(t, archsimd.Float64x2.IsNaN, isNaNSlice[float64])
+ testFloat64x4UnaryCompare(t, archsimd.Float64x4.IsNaN, isNaNSlice[float64])
+
+ if archsimd.X86.AVX512() {
+ testFloat32x16UnaryCompare(t, archsimd.Float32x16.IsNaN, isNaNSlice[float32])
+ testFloat64x8UnaryCompare(t, archsimd.Float64x8.IsNaN, isNaNSlice[float64])
+ }
+
+ // Test x.IsNaN().Or(y.IsNaN()), which is optimized to VCMPP(S|D) $3, x, y.
+ want32 := mapCompare(func(x, y float32) bool { return x != x || y != y })
+ want64 := mapCompare(func(x, y float64) bool { return x != x || y != y })
+ testFloat32x4Compare(t,
+ func(x, y archsimd.Float32x4) archsimd.Mask32x4 {
+ return x.IsNaN().Or(y.IsNaN())
+ }, want32)
+ testFloat64x2Compare(t,
+ func(x, y archsimd.Float64x2) archsimd.Mask64x2 {
+ return x.IsNaN().Or(y.IsNaN())
+ }, want64)
+
+ if archsimd.X86.AVX2() {
+ testFloat32x8Compare(t,
+ func(x, y archsimd.Float32x8) archsimd.Mask32x8 {
+ return x.IsNaN().Or(y.IsNaN())
+ }, want32)
+ testFloat64x4Compare(t,
+ func(x, y archsimd.Float64x4) archsimd.Mask64x4 {
+ return x.IsNaN().Or(y.IsNaN())
+ }, want64)
+ }
+
+ if archsimd.X86.AVX512() {
+ testFloat32x16Compare(t,
+ func(x, y archsimd.Float32x16) archsimd.Mask32x16 {
+ return x.IsNaN().Or(y.IsNaN())
+ }, want32)
+ testFloat64x8Compare(t,
+ func(x, y archsimd.Float64x8) archsimd.Mask64x8 {
+ return x.IsNaN().Or(y.IsNaN())
+ }, want64)
+ }
+}
diff --git a/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go b/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go
index 7ceee652a9..c7197568ed 100644
--- a/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go
+++ b/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go
@@ -1,6 +1,6 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
-//go:build goexperiment.simd
+//go:build goexperiment.simd && amd64
// This file contains functions testing simd methods that compare two operands under a mask.
// Each function in this file is specialized for a
diff --git a/src/simd/archsimd/internal/simd_test/generate.go b/src/simd/archsimd/internal/simd_test/generate.go
index e744a5299f..4bc24fea2d 100644
--- a/src/simd/archsimd/internal/simd_test/generate.go
+++ b/src/simd/archsimd/internal/simd_test/generate.go
@@ -7,5 +7,6 @@
package simd
// Invoke code generators.
+// The test helpers are generated by tmplgen from the archsimd package.
-//go:generate go run -C ../.. genfiles.go
+//go:generate go -C ../.. generate
diff --git a/src/simd/archsimd/internal/simd_test/helpers_test.go b/src/simd/archsimd/internal/simd_test/helpers_test.go
index b9d5098dba..d6963586c0 100644
--- a/src/simd/archsimd/internal/simd_test/helpers_test.go
+++ b/src/simd/archsimd/internal/simd_test/helpers_test.go
@@ -126,8 +126,22 @@ func map1[T, U any](elem func(x T) U) func(x []T) []U {
}
}
-// map1 returns a function that returns the slice of the results of applying
-// comparison function elem to the respective elements of its two slice inputs.
+// map1n returns a function that returns the slice of the results of applying
+// input parameter elem to the respective elements of its single slice input,
+// extended (with zero values) or truncated to length n.
+func map1n[T, U any](elem func(x T) U, n int) func(x []T) []U {
+ return func(x []T) []U {
+ s := make([]U, n)
+ for i := range min(len(x), n) {
+ s[i] = elem(x[i])
+ }
+ return s
+ }
+}
+
+// mapCompare returns a function that returns the slice of the results of applying
+// comparison function elem to the respective elements of its two slice inputs,
+// and returns -1 if the comparison is true, 0 otherwise.
func mapCompare[T number](elem func(x, y T) bool) func(x, y []T) []int64 {
return func(x, y []T) []int64 {
s := make([]int64, len(x))
@@ -168,12 +182,14 @@ var nzero = -zero
var inf = 1 / zero
var ninf = -1 / zero
var nan = math.NaN()
+var snan32 = math.Float32frombits(0x7f800001)
+var snan64 = math.Float64frombits(0x7ff0000000000001)
// N controls how large the test vectors are
const N = 144
-var float32s = nOf(N, []float32{float32(inf), float32(ninf), 1, float32(nan), float32(zero), 2, float32(nan), float32(zero), 3, float32(-zero), float32(1.0 / zero), float32(-1.0 / zero), 1.0 / 2, 1.0 / 4, 1.0 / 8, 1.0 / 1000, 1.0 / 1000000, 1, -1, 0, 2, -2, 3, -3, math.MaxFloat32, 1 / math.MaxFloat32, 10, -10, 100, 20, -20, 300, -300, -4000, -80, -160, -3200, -64, -4, -8, -16, -32, -64})
-var float64s = nOf(N, []float64{inf, ninf, nan, zero, -zero, 1 / zero, -1 / zero, 0.0001, 0.0000001, 1, -1, 0, 2, -2, 3, -3, math.MaxFloat64, 1.0 / math.MaxFloat64, 10, -10, 100, 20, -20, 300, -300, -4000, -80, -16, -32, -64})
+var float32s = nOf(N, []float32{float32(inf), float32(ninf), 1, float32(nan), snan32, -float32(nan), -snan32, float32(zero), 2, float32(nan), float32(zero), 3, float32(-zero), float32(1.0 / zero), float32(-1.0 / zero), 1.0 / 2, 1.0 / 4, 1.0 / 8, 1.0 / 1000, 1.0 / 1000000, 1, -1, 0, 2, -2, 3, -3, math.MaxFloat32, 1 / math.MaxFloat32, 10, -10, 100, 20, -20, 300, -300, -4000, -80, -160, -3200, -64, -4, -8, -16, -32, -64})
+var float64s = nOf(N, []float64{inf, ninf, nan, snan64, -nan, -snan64, zero, -zero, 1 / zero, -1 / zero, 0.0001, 0.0000001, 1, -1, 0, 2, -2, 3, -3, math.MaxFloat64, 1.0 / math.MaxFloat64, 10, -10, 100, 20, -20, 300, -300, -4000, -80, -16, -32, -64})
var int32s = nOf(N, []int32{1, -1, 0, 2, 4, 8, 1024, 0xffffff, -0xffffff, 0x55555, 0x77777, 0xccccc, -0x55555, -0x77777, -0xccccc, -4, -8, -16, -32, -64})
var uint32s = nOf(N, []uint32{1, 0, 2, 4, 8, 1024, 0xffffff, ^uint32(0xffffff), 0x55555, 0x77777, 0xccccc, ^uint32(0x55555), ^uint32(0x77777), ^uint32(0xccccc)})
diff --git a/src/simd/archsimd/internal/simd_test/simd_test.go b/src/simd/archsimd/internal/simd_test/simd_test.go
index 83925ae789..36bde92455 100644
--- a/src/simd/archsimd/internal/simd_test/simd_test.go
+++ b/src/simd/archsimd/internal/simd_test/simd_test.go
@@ -13,6 +13,7 @@ import (
"simd/archsimd"
"slices"
"testing"
+ "unsafe"
)
func TestMain(m *testing.M) {
@@ -225,6 +226,10 @@ func TestShiftAll(t *testing.T) {
}
func TestSlicesInt8(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
v := archsimd.LoadInt8x32Slice(a)
@@ -258,6 +263,10 @@ func TestSlicesInt8GetElem(t *testing.T) {
}
func TestSlicesInt8TooShortLoad(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
defer func() {
if r := recover(); r != nil {
t.Logf("Saw EXPECTED panic %v", r)
@@ -274,6 +283,10 @@ func TestSlicesInt8TooShortLoad(t *testing.T) {
}
func TestSlicesInt8TooShortStore(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
defer func() {
if r := recover(); r != nil {
t.Logf("Saw EXPECTED panic %v", r)
@@ -303,6 +316,10 @@ func TestSlicesFloat64(t *testing.T) {
// TODO: try to reduce this test to be smaller.
func TestMergeLocals(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
testMergeLocalswrapper(t, archsimd.Int64x4.Add)
}
@@ -362,12 +379,79 @@ func TestBitMaskFromBitsLoad(t *testing.T) {
}
func TestBitMaskToBits(t *testing.T) {
- if !archsimd.X86.AVX512() {
- t.Skip("Test requires X86.AVX512, not available on this hardware")
- return
+ int8s := []int8{
+ 0, 1, 1, 0, 0, 1, 0, 1,
+ 1, 0, 1, 1, 0, 0, 1, 0,
+ 1, 0, 0, 1, 1, 0, 1, 0,
+ 0, 1, 1, 0, 0, 1, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 0, 1, 1, 0, 0, 1,
+ 1, 0, 1, 0, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1,
+ }
+ int16s := make([]int16, 32)
+ for i := range int16s {
+ int16s[i] = int16(int8s[i])
+ }
+ int32s := make([]int32, 16)
+ for i := range int32s {
+ int32s[i] = int32(int8s[i])
+ }
+ int64s := make([]int64, 8)
+ for i := range int64s {
+ int64s[i] = int64(int8s[i])
+ }
+ want64 := uint64(0)
+ for i := range int8s {
+ want64 |= uint64(int8s[i]) << i
+ }
+ want32 := uint32(want64)
+ want16 := uint16(want64)
+ want8 := uint8(want64)
+ want4 := want8 & 0b1111
+ want2 := want4 & 0b11
+
+ if v := archsimd.LoadInt8x16Slice(int8s[:16]).ToMask().ToBits(); v != want16 {
+ t.Errorf("want %b, got %b", want16, v)
+ }
+ if v := archsimd.LoadInt32x4Slice(int32s[:4]).ToMask().ToBits(); v != want4 {
+ t.Errorf("want %b, got %b", want4, v)
+ }
+ if v := archsimd.LoadInt32x8Slice(int32s[:8]).ToMask().ToBits(); v != want8 {
+ t.Errorf("want %b, got %b", want8, v)
+ }
+ if v := archsimd.LoadInt64x2Slice(int64s[:2]).ToMask().ToBits(); v != want2 {
+ t.Errorf("want %b, got %b", want2, v)
+ }
+ if v := archsimd.LoadInt64x4Slice(int64s[:4]).ToMask().ToBits(); v != want4 {
+ t.Errorf("want %b, got %b", want4, v)
}
- if v := archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 {
- t.Errorf("Want 0b101, got %b", v)
+
+ if archsimd.X86.AVX2() {
+ if v := archsimd.LoadInt8x32Slice(int8s[:32]).ToMask().ToBits(); v != want32 {
+ t.Errorf("want %b, got %b", want32, v)
+ }
+ }
+
+ if archsimd.X86.AVX512() {
+ if v := archsimd.LoadInt8x64Slice(int8s).ToMask().ToBits(); v != want64 {
+ t.Errorf("want %b, got %b", want64, v)
+ }
+ if v := archsimd.LoadInt16x8Slice(int16s[:8]).ToMask().ToBits(); v != want8 {
+ t.Errorf("want %b, got %b", want8, v)
+ }
+ if v := archsimd.LoadInt16x16Slice(int16s[:16]).ToMask().ToBits(); v != want16 {
+ t.Errorf("want %b, got %b", want16, v)
+ }
+ if v := archsimd.LoadInt16x32Slice(int16s).ToMask().ToBits(); v != want32 {
+ t.Errorf("want %b, got %b", want32, v)
+ }
+ if v := archsimd.LoadInt32x16Slice(int32s).ToMask().ToBits(); v != want16 {
+ t.Errorf("want %b, got %b", want16, v)
+ }
+ if v := archsimd.LoadInt64x8Slice(int64s).ToMask().ToBits(); v != want8 {
+ t.Errorf("want %b, got %b", want8, v)
+ }
}
}
@@ -385,6 +469,10 @@ func TestBitMaskToBitsStore(t *testing.T) {
}
func TestMergeFloat(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
k := make([]int64, 4, 4)
s := make([]float64, 4, 4)
@@ -472,6 +560,10 @@ func TestBroadcastUint16x8(t *testing.T) {
}
func TestBroadcastInt8x32(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
s := make([]int8, 32, 32)
archsimd.BroadcastInt8x32(-123).StoreSlice(s)
checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123,
@@ -1105,6 +1197,10 @@ func TestSelectTernOptInt32x16(t *testing.T) {
}
func TestMaskedMerge(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
x := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
y := archsimd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
z := archsimd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
@@ -1123,40 +1219,6 @@ func TestMaskedMerge(t *testing.T) {
}
}
-func TestDotProductQuadruple(t *testing.T) {
- if !archsimd.X86.AVXVNNI() {
- t.Skip("Test requires X86.AVXVNNI, not available on this hardware")
- return
- }
- xd := make([]int8, 16)
- yd := make([]uint8, 16)
- zd := make([]int32, 4)
- wanted1 := make([]int32, 4)
- wanted2 := make([]int32, 4)
- res1 := make([]int32, 4)
- res2 := make([]int32, 4)
- for i := range 4 {
- xd[i] = 5
- yd[i] = 6
- zd[i] = 3
- wanted1[i] = 30
- wanted2[i] = 30
- }
- x := archsimd.LoadInt8x16Slice(xd)
- y := archsimd.LoadUint8x16Slice(yd)
- z := archsimd.LoadInt32x4Slice(zd)
- x.DotProductQuadruple(y).StoreSlice(res1)
- x.DotProductQuadruple(y).Add(z).StoreSlice(res1)
- for i := range 4 {
- if res1[i] != wanted1[i] {
- t.Errorf("got %d wanted %d", res1[i], wanted1[i])
- }
- if res2[i] != wanted2[i] {
- t.Errorf("got %d wanted %d", res2[i], wanted2[i])
- }
- }
-}
-
func TestPermuteScalars(t *testing.T) {
x := []int32{11, 12, 13, 14}
want := []int32{12, 13, 14, 11}
@@ -1166,6 +1228,10 @@ func TestPermuteScalars(t *testing.T) {
}
func TestPermuteScalarsGrouped(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
x := []int32{11, 12, 13, 14, 21, 22, 23, 24}
want := []int32{12, 13, 14, 11, 22, 23, 24, 21}
got := make([]int32, 8)
@@ -1190,6 +1256,10 @@ func TestPermuteScalarsLo(t *testing.T) {
}
func TestPermuteScalarsHiGrouped(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114}
want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111}
got := make([]int16, len(x))
@@ -1198,6 +1268,10 @@ func TestPermuteScalarsHiGrouped(t *testing.T) {
}
func TestPermuteScalarsLoGrouped(t *testing.T) {
+ if !archsimd.X86.AVX2() {
+ t.Skip("Test requires X86.AVX2, not available on this hardware")
+ return
+ }
x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17}
want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17}
got := make([]int16, len(x))
@@ -1222,3 +1296,147 @@ func TestClMul(t *testing.T) {
foo(y.CarrylessMultiply(0, 0, y), []uint64{5, 0})
}
+
+func addPairsSlice[T number](a, b []T) []T {
+ r := make([]T, len(a))
+ for i := range len(a) / 2 {
+ r[i] = a[2*i] + a[2*i+1]
+ r[i+len(a)/2] = b[2*i] + b[2*i+1]
+ }
+ return r
+}
+
+func subPairsSlice[T number](a, b []T) []T {
+ r := make([]T, len(a))
+ for i := range len(a) / 2 {
+ r[i] = a[2*i] - a[2*i+1]
+ r[i+len(a)/2] = b[2*i] - b[2*i+1]
+ }
+ return r
+}
+
+func addPairsGroupedSlice[T number](a, b []T) []T {
+ group := int(128 / unsafe.Sizeof(a[0]))
+ r := make([]T, 0, len(a))
+ for i := range len(a) / group {
+ r = append(r, addPairsSlice(a[i*group:(i+1)*group], b[i*group:(i+1)*group])...)
+ }
+ return r
+}
+
+func subPairsGroupedSlice[T number](a, b []T) []T {
+ group := int(128 / unsafe.Sizeof(a[0]))
+ r := make([]T, 0, len(a))
+ for i := range len(a) / group {
+ r = append(r, subPairsSlice(a[i*group:(i+1)*group], b[i*group:(i+1)*group])...)
+ }
+ return r
+}
+
+func TestAddSubPairs(t *testing.T) {
+ testInt16x8Binary(t, archsimd.Int16x8.AddPairs, addPairsSlice[int16])
+ testInt16x8Binary(t, archsimd.Int16x8.SubPairs, subPairsSlice[int16])
+ testUint16x8Binary(t, archsimd.Uint16x8.AddPairs, addPairsSlice[uint16])
+ testUint16x8Binary(t, archsimd.Uint16x8.SubPairs, subPairsSlice[uint16])
+ testInt32x4Binary(t, archsimd.Int32x4.AddPairs, addPairsSlice[int32])
+ testInt32x4Binary(t, archsimd.Int32x4.SubPairs, subPairsSlice[int32])
+ testUint32x4Binary(t, archsimd.Uint32x4.AddPairs, addPairsSlice[uint32])
+ testUint32x4Binary(t, archsimd.Uint32x4.SubPairs, subPairsSlice[uint32])
+ testFloat32x4Binary(t, archsimd.Float32x4.AddPairs, addPairsSlice[float32])
+ testFloat32x4Binary(t, archsimd.Float32x4.SubPairs, subPairsSlice[float32])
+ testFloat64x2Binary(t, archsimd.Float64x2.AddPairs, addPairsSlice[float64])
+ testFloat64x2Binary(t, archsimd.Float64x2.SubPairs, subPairsSlice[float64])
+
+ // Grouped versions
+ if archsimd.X86.AVX2() {
+ testInt16x16Binary(t, archsimd.Int16x16.AddPairsGrouped, addPairsGroupedSlice[int16])
+ testInt16x16Binary(t, archsimd.Int16x16.SubPairsGrouped, subPairsGroupedSlice[int16])
+ testUint16x16Binary(t, archsimd.Uint16x16.AddPairsGrouped, addPairsGroupedSlice[uint16])
+ testUint16x16Binary(t, archsimd.Uint16x16.SubPairsGrouped, subPairsGroupedSlice[uint16])
+ testInt32x8Binary(t, archsimd.Int32x8.AddPairsGrouped, addPairsGroupedSlice[int32])
+ testInt32x8Binary(t, archsimd.Int32x8.SubPairsGrouped, subPairsGroupedSlice[int32])
+ testUint32x8Binary(t, archsimd.Uint32x8.AddPairsGrouped, addPairsGroupedSlice[uint32])
+ testUint32x8Binary(t, archsimd.Uint32x8.SubPairsGrouped, subPairsGroupedSlice[uint32])
+ testFloat32x8Binary(t, archsimd.Float32x8.AddPairsGrouped, addPairsGroupedSlice[float32])
+ testFloat32x8Binary(t, archsimd.Float32x8.SubPairsGrouped, subPairsGroupedSlice[float32])
+ testFloat64x4Binary(t, archsimd.Float64x4.AddPairsGrouped, addPairsGroupedSlice[float64])
+ testFloat64x4Binary(t, archsimd.Float64x4.SubPairsGrouped, subPairsGroupedSlice[float64])
+ }
+}
+
+func convConcatSlice[T, U number](a, b []T, conv func(T) U) []U {
+ r := make([]U, len(a)+len(b))
+ for i, v := range a {
+ r[i] = conv(v)
+ }
+ for i, v := range b {
+ r[len(a)+i] = conv(v)
+ }
+ return r
+}
+
+func convConcatGroupedSlice[T, U number](a, b []T, conv func(T) U) []U {
+ group := int(128 / unsafe.Sizeof(a[0]))
+ r := make([]U, 0, len(a)+len(b))
+ for i := 0; i < len(a)/group; i++ {
+ r = append(r, convConcatSlice(a[i*group:(i+1)*group], b[i*group:(i+1)*group], conv)...)
+ }
+ return r
+}
+
+func TestSaturateConcat(t *testing.T) {
+ // Int32x4.SaturateToInt16Concat
+ forSlicePair(t, int32s, 4, func(x, y []int32) bool {
+ a, b := archsimd.LoadInt32x4Slice(x), archsimd.LoadInt32x4Slice(y)
+ var out [8]int16
+ a.SaturateToInt16Concat(b).Store(&out)
+ want := convConcatSlice(x, y, satToInt16)
+ return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
+ })
+ // Int32x4.SaturateToUint16Concat
+ forSlicePair(t, int32s, 4, func(x, y []int32) bool {
+ a, b := archsimd.LoadInt32x4Slice(x), archsimd.LoadInt32x4Slice(y)
+ var out [8]uint16
+ a.SaturateToUint16Concat(b).Store(&out)
+ want := convConcatSlice(x, y, satToUint16)
+ return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
+ })
+
+ if archsimd.X86.AVX2() {
+ // Int32x8.SaturateToInt16ConcatGrouped
+ forSlicePair(t, int32s, 8, func(x, y []int32) bool {
+ a, b := archsimd.LoadInt32x8Slice(x), archsimd.LoadInt32x8Slice(y)
+ var out [16]int16
+ a.SaturateToInt16ConcatGrouped(b).Store(&out)
+ want := convConcatGroupedSlice(x, y, satToInt16)
+ return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
+ })
+ // Int32x8.SaturateToUint16ConcatGrouped
+ forSlicePair(t, int32s, 8, func(x, y []int32) bool {
+ a, b := archsimd.LoadInt32x8Slice(x), archsimd.LoadInt32x8Slice(y)
+ var out [16]uint16
+ a.SaturateToUint16ConcatGrouped(b).Store(&out)
+ want := convConcatGroupedSlice(x, y, satToUint16)
+ return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
+ })
+ }
+
+ if archsimd.X86.AVX512() {
+ // Int32x16.SaturateToInt16ConcatGrouped
+ forSlicePair(t, int32s, 16, func(x, y []int32) bool {
+ a, b := archsimd.LoadInt32x16Slice(x), archsimd.LoadInt32x16Slice(y)
+ var out [32]int16
+ a.SaturateToInt16ConcatGrouped(b).Store(&out)
+ want := convConcatGroupedSlice(x, y, satToInt16)
+ return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
+ })
+ // Int32x16.SaturateToUint16ConcatGrouped
+ forSlicePair(t, int32s, 16, func(x, y []int32) bool {
+ a, b := archsimd.LoadInt32x16Slice(x), archsimd.LoadInt32x16Slice(y)
+ var out [32]uint16
+ a.SaturateToUint16ConcatGrouped(b).Store(&out)
+ want := convConcatGroupedSlice(x, y, satToUint16)
+ return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) })
+ })
+ }
+}
diff --git a/src/simd/archsimd/internal/simd_test/simulation_helpers_test.go b/src/simd/archsimd/internal/simd_test/simulation_helpers_test.go
index 2f040ffb3e..ac60b6d377 100644
--- a/src/simd/archsimd/internal/simd_test/simulation_helpers_test.go
+++ b/src/simd/archsimd/internal/simd_test/simulation_helpers_test.go
@@ -29,6 +29,10 @@ func notEqual[T number](x, y T) bool {
return x != y
}
+func isNaN[T float](x T) bool {
+ return x != x
+}
+
func abs[T number](x T) T {
// TODO this will need a non-standard FP-equality test.
if x == 0 { // true if x is -0.
@@ -121,16 +125,6 @@ func toUint64[T number](x T) uint64 {
}
func toUint32[T number](x T) uint32 {
- switch y := (any(x)).(type) {
- case float32:
- if y < 0 || y > float32(math.MaxUint32) || y != y {
- return math.MaxUint32
- }
- case float64:
- if y < 0 || y > float64(math.MaxUint32) || y != y {
- return math.MaxUint32
- }
- }
return uint32(x)
}
@@ -158,6 +152,74 @@ func toFloat64[T number](x T) float64 {
return float64(x)
}
+// X86 specific behavior for conversion from float to int32.
+// If the value cannot be represented as int32, it returns -0x80000000.
+func floatToInt32_x86[T float](x T) int32 {
+ switch y := (any(x)).(type) {
+ case float32:
+ if y != y || y < math.MinInt32 ||
+ y >= math.MaxInt32 { // float32(MaxInt32) == 0x80000000, actually overflows
+ return -0x80000000
+ }
+ case float64:
+ if y != y || y < math.MinInt32 ||
+ y > math.MaxInt32 { // float64(MaxInt32) is exact, no overflow
+ return -0x80000000
+ }
+ }
+ return int32(x)
+}
+
+// X86 specific behavior for conversion from float to int64.
+// If the value cannot be represented as int64, it returns -0x80000000_00000000.
+func floatToInt64_x86[T float](x T) int64 {
+ switch y := (any(x)).(type) {
+ case float32:
+ if y != y || y < math.MinInt64 ||
+ y >= math.MaxInt64 { // float32(MaxInt64) == 0x80000000_00000000, actually overflows
+ return -0x80000000_00000000
+ }
+ case float64:
+ if y != y || y < math.MinInt64 ||
+ y >= math.MaxInt64 { // float64(MaxInt64) == 0x80000000_00000000, also overflows
+ return -0x80000000_00000000
+ }
+ }
+ return int64(x)
+}
+
+// X86 specific behavior for conversion from float to uint32.
+// If the value cannot be represented as uint32, it returns 1<<32 - 1.
+func floatToUint32_x86[T float](x T) uint32 {
+ switch y := (any(x)).(type) {
+ case float32:
+ if y < 0 || y > math.MaxUint32 || y != y {
+ return 1<<32 - 1
+ }
+ case float64:
+ if y < 0 || y > math.MaxUint32 || y != y {
+ return 1<<32 - 1
+ }
+ }
+ return uint32(x)
+}
+
+// X86 specific behavior for conversion from float to uint64.
+// If the value cannot be represented as uint64, it returns 1<<64 - 1.
+func floatToUint64_x86[T float](x T) uint64 {
+ switch y := (any(x)).(type) {
+ case float32:
+ if y < 0 || y > math.MaxUint64 || y != y {
+ return 1<<64 - 1
+ }
+ case float64:
+ if y < 0 || y > math.MaxUint64 || y != y {
+ return 1<<64 - 1
+ }
+ }
+ return uint64(x)
+}
+
func ceilResidueForPrecision[T float](i int) func(T) T {
f := 1.0
for i > 0 {
@@ -241,6 +303,15 @@ func notEqualSlice[T number](x, y []T) []int64 {
return mapCompare[T](notEqual)(x, y)
}
+func isNaNSlice[T float](x []T) []int64 {
+ return map1[T](func(x T) int64 {
+ if isNaN(x) {
+ return -1
+ }
+ return 0
+ })(x)
+}
+
func ceilSlice[T float](x []T) []T {
return map1[T](ceil)(x)
}
@@ -272,3 +343,90 @@ func imaSlice[T integer](x, y, z []T) []T {
func fmaSlice[T float](x, y, z []T) []T {
return map3[T](fma)(x, y, z)
}
+
+func satToInt8[T integer](x T) int8 {
+ var m int8 = -128
+ var M int8 = 127
+ if T(M) < T(m) { // expecting T being a larger type
+ panic("bad input type")
+ }
+ if x < T(m) {
+ return m
+ }
+ if x > T(M) {
+ return M
+ }
+ return int8(x)
+}
+
+func satToUint8[T integer](x T) uint8 {
+ var M uint8 = 255
+ if T(M) < 0 { // expecting T being a larger type
+ panic("bad input type")
+ }
+ if x < 0 {
+ return 0
+ }
+ if x > T(M) {
+ return M
+ }
+ return uint8(x)
+}
+
+func satToInt16[T integer](x T) int16 {
+ var m int16 = -32768
+ var M int16 = 32767
+ if T(M) < T(m) { // expecting T being a larger type
+ panic("bad input type")
+ }
+ if x < T(m) {
+ return m
+ }
+ if x > T(M) {
+ return M
+ }
+ return int16(x)
+}
+
+func satToUint16[T integer](x T) uint16 {
+ var M uint16 = 65535
+ if T(M) < 0 { // expecting T being a larger type
+ panic("bad input type")
+ }
+ if x < 0 {
+ return 0
+ }
+ if x > T(M) {
+ return M
+ }
+ return uint16(x)
+}
+
+func satToInt32[T integer](x T) int32 {
+ var m int32 = -1 << 31
+ var M int32 = 1<<31 - 1
+ if T(M) < T(m) { // expecting T being a larger type
+ panic("bad input type")
+ }
+ if x < T(m) {
+ return m
+ }
+ if x > T(M) {
+ return M
+ }
+ return int32(x)
+}
+
+func satToUint32[T integer](x T) uint32 {
+ var M uint32 = 1<<32 - 1
+ if T(M) < 0 { // expecting T being a larger type
+ panic("bad input type")
+ }
+ if x < 0 {
+ return 0
+ }
+ if x > T(M) {
+ return M
+ }
+ return uint32(x)
+}
diff --git a/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go b/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go
index c37f9ef0ca..2e25010890 100644
--- a/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go
+++ b/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go
@@ -1,6 +1,6 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
-//go:build goexperiment.simd
+//go:build goexperiment.simd && amd64
// This file contains functions testing ternary simd methods.
// Each function in this file is specialized for a
diff --git a/src/simd/archsimd/internal/simd_test/unary_helpers_test.go b/src/simd/archsimd/internal/simd_test/unary_helpers_test.go
index e2610ad98b..5d14c4ff05 100644
--- a/src/simd/archsimd/internal/simd_test/unary_helpers_test.go
+++ b/src/simd/archsimd/internal/simd_test/unary_helpers_test.go
@@ -1,6 +1,6 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
-//go:build goexperiment.simd
+//go:build goexperiment.simd && amd64
// This file contains functions testing unary simd methods.
// Each function in this file is specialized for a
@@ -433,915 +433,8400 @@ func testFloat64x8Unary(t *testing.T, f func(_ archsimd.Float64x8) archsimd.Floa
})
}
-// testInt8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt8x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x16ConvertToInt8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int8x16, want func(x []int8) []int8) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x8ConvertToInt8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int8x16, want func(x []int16) []int8) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x4ConvertToInt8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int8x16, want func(x []int32) []int8) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToInt8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int8x16, want func(x []int64) []int8) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x16ConvertToInt8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int8x16, want func(x []uint8) []int8) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x8ConvertToInt8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int8x16, want func(x []uint16) []int8) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x4ConvertToInt8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int8x16, want func(x []uint32) []int8) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToInt8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int8x16, want func(x []uint64) []int8) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x4ConvertToInt8(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int8x16, want func(x []float32) []int8) {
+ n := 4
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x4Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToInt8(t *testing.T, f func(x archsimd.Float64x2) archsimd.Int8x16, want func(x []float64) []int8) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToInt8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int8x32, want func(x []int8) []int8) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]int8, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x16ConvertToInt8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int8x16, want func(x []int16) []int8) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x8ConvertToInt8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int8x16, want func(x []int32) []int8) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x4ConvertToInt8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int8x16, want func(x []int64) []int8) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToInt8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int8x32, want func(x []uint8) []int8) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]int8, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x16ConvertToInt8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int8x16, want func(x []uint16) []int8) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x8ConvertToInt8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int8x16, want func(x []uint32) []int8) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x4ConvertToInt8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int8x16, want func(x []uint64) []int8) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x8ConvertToInt8(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int8x16, want func(x []float32) []int8) {
+ n := 8
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x8Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x4ConvertToInt8(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int8x16, want func(x []float64) []int8) {
+ n := 4
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x4Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToInt8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int8x64, want func(x []int8) []int8) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]int8, 64)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToInt8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int8x32, want func(x []int16) []int8) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]int8, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x16ConvertToInt8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int8x16, want func(x []int32) []int8) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x8ConvertToInt8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int8x16, want func(x []int64) []int8) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToInt8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int8x64, want func(x []uint8) []int8) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]int8, 64)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToInt8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int8x32, want func(x []uint16) []int8) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]int8, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x16ConvertToInt8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int8x16, want func(x []uint32) []int8) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x8ConvertToInt8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int8x16, want func(x []uint64) []int8) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x16ConvertToInt8(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int8x16, want func(x []float32) []int8) {
+ n := 16
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x16Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x8ConvertToInt8(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int8x16, want func(x []float64) []int8) {
+ n := 8
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x8Slice(x)
+ g := make([]int8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x16ConvertToUint8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint8x16, want func(x []int8) []uint8) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x8ConvertToUint8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint8x16, want func(x []int16) []uint8) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x4ConvertToUint8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint8x16, want func(x []int32) []uint8) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToUint8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint8x16, want func(x []int64) []uint8) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x16ConvertToUint8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint8x16, want func(x []uint8) []uint8) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x8ConvertToUint8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint8x16, want func(x []uint16) []uint8) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x4ConvertToUint8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint8x16, want func(x []uint32) []uint8) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToUint8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint8x16, want func(x []uint64) []uint8) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x4ConvertToUint8(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint8x16, want func(x []float32) []uint8) {
+ n := 4
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x4Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToUint8(t *testing.T, f func(x archsimd.Float64x2) archsimd.Uint8x16, want func(x []float64) []uint8) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToUint8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint8x32, want func(x []int8) []uint8) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]uint8, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x16ConvertToUint8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint8x16, want func(x []int16) []uint8) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x8ConvertToUint8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint8x16, want func(x []int32) []uint8) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x4ConvertToUint8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint8x16, want func(x []int64) []uint8) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToUint8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint8x32, want func(x []uint8) []uint8) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]uint8, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x16ConvertToUint8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint8x16, want func(x []uint16) []uint8) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x8ConvertToUint8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint8x16, want func(x []uint32) []uint8) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x4ConvertToUint8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint8x16, want func(x []uint64) []uint8) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x8ConvertToUint8(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint8x16, want func(x []float32) []uint8) {
+ n := 8
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x8Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x4ConvertToUint8(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint8x16, want func(x []float64) []uint8) {
+ n := 4
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x4Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToUint8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint8x64, want func(x []int8) []uint8) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]uint8, 64)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToUint8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint8x32, want func(x []int16) []uint8) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]uint8, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x16ConvertToUint8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint8x16, want func(x []int32) []uint8) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x8ConvertToUint8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint8x16, want func(x []int64) []uint8) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToUint8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint8x64, want func(x []uint8) []uint8) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]uint8, 64)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToUint8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint8x32, want func(x []uint16) []uint8) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]uint8, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x16ConvertToUint8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint8x16, want func(x []uint32) []uint8) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x8ConvertToUint8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint8x16, want func(x []uint64) []uint8) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x16ConvertToUint8(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint8x16, want func(x []float32) []uint8) {
+ n := 16
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x16Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x8ConvertToUint8(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint8x16, want func(x []float64) []uint8) {
+ n := 8
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x8Slice(x)
+ g := make([]uint8, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x16ConvertToInt16(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int16x16, want func(x []int8) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]int16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x8ConvertToInt16(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int16x8, want func(x []int16) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x4ConvertToInt16(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int16x8, want func(x []int32) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToInt16(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int16x8, want func(x []int64) []int16) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x16ConvertToInt16(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int16x16, want func(x []uint8) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]int16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x8ConvertToInt16(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int16x8, want func(x []uint16) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x4ConvertToInt16(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int16x8, want func(x []uint32) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToInt16(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int16x8, want func(x []uint64) []int16) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x4ConvertToInt16(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int16x8, want func(x []float32) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToInt16(t *testing.T, f func(x archsimd.Float64x2) archsimd.Int16x8, want func(x []float64) []int16) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToInt16(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int16x32, want func(x []int8) []int16) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]int16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x16ConvertToInt16(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int16x16, want func(x []int16) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]int16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x8ConvertToInt16(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int16x8, want func(x []int32) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x4ConvertToInt16(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int16x8, want func(x []int64) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToInt16(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int16x32, want func(x []uint8) []int16) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]int16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x16ConvertToInt16(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int16x16, want func(x []uint16) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]int16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x8ConvertToInt16(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int16x8, want func(x []uint32) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x4ConvertToInt16(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int16x8, want func(x []uint64) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x8ConvertToInt16(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int16x8, want func(x []float32) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x4ConvertToInt16(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int16x8, want func(x []float64) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToInt16(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int16x32, want func(x []int8) []int16) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]int16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToInt16(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int16x32, want func(x []int16) []int16) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]int16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x16ConvertToInt16(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int16x16, want func(x []int32) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]int16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x8ConvertToInt16(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int16x8, want func(x []int64) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToInt16(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int16x32, want func(x []uint8) []int16) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]int16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToInt16(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int16x32, want func(x []uint16) []int16) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]int16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x16ConvertToInt16(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int16x16, want func(x []uint32) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]int16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x8ConvertToInt16(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int16x8, want func(x []uint64) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x16ConvertToInt16(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int16x16, want func(x []float32) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x16Slice(x)
+ g := make([]int16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x8ConvertToInt16(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int16x8, want func(x []float64) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x16ConvertToUint16(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint16x16, want func(x []int8) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]uint16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x8ConvertToUint16(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint16x8, want func(x []int16) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x4ConvertToUint16(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint16x8, want func(x []int32) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToUint16(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint16x8, want func(x []int64) []uint16) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint16x16, want func(x []uint8) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]uint16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint16x8, want func(x []uint16) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x4ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint16x8, want func(x []uint32) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint16x8, want func(x []uint64) []uint16) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x4ConvertToUint16(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint16x8, want func(x []float32) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToUint16(t *testing.T, f func(x archsimd.Float64x2) archsimd.Uint16x8, want func(x []float64) []uint16) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToUint16(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint16x32, want func(x []int8) []uint16) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]uint16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x16ConvertToUint16(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint16x16, want func(x []int16) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]uint16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x8ConvertToUint16(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint16x8, want func(x []int32) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x4ConvertToUint16(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint16x8, want func(x []int64) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint16x32, want func(x []uint8) []uint16) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]uint16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint16x16, want func(x []uint16) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]uint16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint16x8, want func(x []uint32) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x4ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint16x8, want func(x []uint64) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x8ConvertToUint16(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint16x8, want func(x []float32) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x4ConvertToUint16(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint16x8, want func(x []float64) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToUint16(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint16x32, want func(x []int8) []uint16) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]uint16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToUint16(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint16x32, want func(x []int16) []uint16) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]uint16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x16ConvertToUint16(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint16x16, want func(x []int32) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]uint16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x8ConvertToUint16(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint16x8, want func(x []int64) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint16x32, want func(x []uint8) []uint16) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]uint16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint16x32, want func(x []uint16) []uint16) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]uint16, 32)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint16x16, want func(x []uint32) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]uint16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint16x8, want func(x []uint64) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x16ConvertToUint16(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint16x16, want func(x []float32) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x16Slice(x)
+ g := make([]uint16, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x8ConvertToUint16(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint16x8, want func(x []float64) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt8x16ConvertToInt32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int32x16, want func(x []int8) []int32) {
n := 16
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
a := archsimd.LoadInt8x16Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt16x8ConvertToInt32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int32x8, want func(x []int16) []int32) {
n := 8
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
a := archsimd.LoadInt16x8Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt32x4ConvertToInt32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int32x4, want func(x []int32) []int32) {
n := 4
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
a := archsimd.LoadInt32x4Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 4)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt64x2ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToInt32(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int32x4, want func(x []int64) []int32) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint8x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int32x16, want func(x []uint8) []int32) {
n := 16
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
a := archsimd.LoadUint8x16Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint16x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int32x8, want func(x []uint16) []int32) {
n := 8
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
a := archsimd.LoadUint16x8Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint32x4ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int32x4, want func(x []uint32) []int32) {
n := 4
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
a := archsimd.LoadUint32x4Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int32x4, want func(x []uint64) []int32) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]int32, 4)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testFloat32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat32x4ConvertToInt32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int32x4, want func(x []float32) []int32) {
n := 4
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
a := archsimd.LoadFloat32x4Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToInt32(t *testing.T, f func(x archsimd.Float64x2) archsimd.Int32x4, want func(x []float64) []int32) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToInt32(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int32x16, want func(x []int8) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt16x16ConvertToInt32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int32x16, want func(x []int16) []int32) {
n := 16
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
a := archsimd.LoadInt16x16Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt32x8ConvertToInt32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int32x8, want func(x []int32) []int32) {
n := 8
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
a := archsimd.LoadInt32x8Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt64x4ConvertToInt32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int32x4, want func(x []int64) []int32) {
n := 4
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
a := archsimd.LoadInt64x4Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 4)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint8x32ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToInt32(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int32x16, want func(x []uint8) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]int32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint16x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int32x16, want func(x []uint16) []int32) {
n := 16
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
a := archsimd.LoadUint16x16Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint32x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int32x8, want func(x []uint32) []int32) {
n := 8
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
a := archsimd.LoadUint32x8Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint64x4ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int32x4, want func(x []uint64) []int32) {
n := 4
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
a := archsimd.LoadUint64x4Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 4)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testFloat32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat32x8ConvertToInt32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int32x8, want func(x []float32) []int32) {
n := 8
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
a := archsimd.LoadFloat32x8Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testFloat64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat64x4ConvertToInt32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int32x4, want func(x []float64) []int32) {
n := 4
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
a := archsimd.LoadFloat64x4Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToInt32(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int32x16, want func(x []int8) []int32) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]int32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToInt32(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int32x16, want func(x []int16) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt32x16ConvertToInt32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int32x16, want func(x []int32) []int32) {
n := 16
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
a := archsimd.LoadInt32x16Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt64x8ConvertToInt32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int32x8, want func(x []int64) []int32) {
n := 8
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
a := archsimd.LoadInt64x8Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToInt32(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int32x16, want func(x []uint8) []int32) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]int32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int32x16, want func(x []uint16) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint32x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int32x16, want func(x []uint32) []int32) {
n := 16
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
a := archsimd.LoadUint32x16Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint64x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int32x8, want func(x []uint64) []int32) {
n := 8
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
a := archsimd.LoadUint64x8Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testFloat32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat32x16ConvertToInt32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int32x16, want func(x []float32) []int32) {
n := 16
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
a := archsimd.LoadFloat32x16Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testFloat64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat64x8ConvertToInt32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int32x8, want func(x []float64) []int32) {
n := 8
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
a := archsimd.LoadFloat64x8Slice(x)
- g := make([]int32, n)
+ g := make([]int32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt8x16ConvertToUint32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint32x16, want func(x []int8) []uint32) {
n := 16
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
a := archsimd.LoadInt8x16Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt16x8ConvertToUint32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint32x8, want func(x []int16) []uint32) {
n := 8
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
a := archsimd.LoadInt16x8Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt32x4ConvertToUint32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint32x4, want func(x []int32) []uint32) {
n := 4
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
a := archsimd.LoadInt32x4Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToUint32(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint32x4, want func(x []int64) []uint32) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]uint32, 4)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint8x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint32x16, want func(x []uint8) []uint32) {
n := 16
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
a := archsimd.LoadUint8x16Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint16x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint32x8, want func(x []uint16) []uint32) {
n := 8
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
a := archsimd.LoadUint16x8Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint32x4ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint32x4, want func(x []uint32) []uint32) {
n := 4
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
a := archsimd.LoadUint32x4Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 4)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint64x2ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint32x4, want func(x []uint64) []uint32) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat32x4ConvertToUint32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint32x4, want func(x []float32) []uint32) {
n := 4
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
a := archsimd.LoadFloat32x4Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToUint32(t *testing.T, f func(x archsimd.Float64x2) archsimd.Uint32x4, want func(x []float64) []uint32) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToUint32(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint32x16, want func(x []int8) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt16x16ConvertToUint32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint32x16, want func(x []int16) []uint32) {
n := 16
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
a := archsimd.LoadInt16x16Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt32x8ConvertToUint32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint32x8, want func(x []int32) []uint32) {
n := 8
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
a := archsimd.LoadInt32x8Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt64x4ConvertToUint32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint32x4, want func(x []int64) []uint32) {
n := 4
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
a := archsimd.LoadInt64x4Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 4)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint8x32ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToUint32(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint32x16, want func(x []uint8) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]uint32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint16x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint32x16, want func(x []uint16) []uint32) {
n := 16
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
a := archsimd.LoadUint16x16Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint32x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint32x8, want func(x []uint32) []uint32) {
n := 8
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
a := archsimd.LoadUint32x8Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint64x4ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint32x4, want func(x []uint64) []uint32) {
n := 4
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
a := archsimd.LoadUint64x4Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 4)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testFloat32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat32x8ConvertToUint32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint32x8, want func(x []float32) []uint32) {
n := 8
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
a := archsimd.LoadFloat32x8Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testFloat64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat64x4ConvertToUint32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint32x4, want func(x []float64) []uint32) {
n := 4
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
a := archsimd.LoadFloat64x4Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToUint32(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint32x16, want func(x []int8) []uint32) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt16x32ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToUint32(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint32x16, want func(x []int16) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]uint32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt32x16ConvertToUint32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint32x16, want func(x []int32) []uint32) {
n := 16
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
a := archsimd.LoadInt32x16Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testInt64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testInt64x8ConvertToUint32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint32x8, want func(x []int64) []uint32) {
n := 8
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
a := archsimd.LoadInt64x8Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToUint32(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint32x16, want func(x []uint8) []uint32) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]uint32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint32x16, want func(x []uint16) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint32x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint32x16, want func(x []uint32) []uint32) {
n := 16
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
a := archsimd.LoadUint32x16Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testUint64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testUint64x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint32x8, want func(x []uint64) []uint32) {
n := 8
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
a := archsimd.LoadUint64x8Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testFloat32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat32x16ConvertToUint32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint32x16, want func(x []float32) []uint32) {
n := 16
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
a := archsimd.LoadFloat32x16Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 16)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
+// testFloat64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
func testFloat64x8ConvertToUint32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint32x8, want func(x []float64) []uint32) {
n := 8
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
a := archsimd.LoadFloat64x8Slice(x)
- g := make([]uint32, n)
+ g := make([]uint32, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt8x16ConvertToUint16(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint16x16, want func(x []int8) []uint16) {
+// testInt8x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x16ConvertToInt64(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int64x8, want func(x []int8) []int64) {
n := 16
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
a := archsimd.LoadInt8x16Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x8ConvertToUint16(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint16x8, want func(x []int16) []uint16) {
+// testInt16x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x8ConvertToInt64(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int64x8, want func(x []int16) []int64) {
n := 8
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
a := archsimd.LoadInt16x8Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint8x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint16x16, want func(x []uint8) []uint16) {
+// testInt32x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x4ConvertToInt64(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int64x4, want func(x []int32) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToInt64(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int64x2, want func(x []int64) []int64) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x16ConvertToInt64(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int64x8, want func(x []uint8) []int64) {
n := 16
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
a := archsimd.LoadUint8x16Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint16x8, want func(x []uint16) []uint16) {
+// testUint16x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x8ConvertToInt64(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int64x8, want func(x []uint16) []int64) {
n := 8
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
a := archsimd.LoadUint16x8Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt8x32ConvertToUint16(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint16x32, want func(x []int8) []uint16) {
+// testUint32x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x4ConvertToInt64(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int64x4, want func(x []uint32) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToInt64(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int64x2, want func(x []uint64) []int64) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x4ConvertToInt64(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int64x4, want func(x []float32) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToInt64(t *testing.T, f func(x archsimd.Float64x2) archsimd.Int64x2, want func(x []float64) []int64) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToInt64(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int64x8, want func(x []int8) []int64) {
n := 32
t.Helper()
forSlice(t, int8s, n, func(x []int8) bool {
t.Helper()
a := archsimd.LoadInt8x32Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x16ConvertToUint16(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint16x16, want func(x []int16) []uint16) {
+// testInt16x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x16ConvertToInt64(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int64x8, want func(x []int16) []int64) {
n := 16
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
a := archsimd.LoadInt16x16Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x8ConvertToUint16(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint16x8, want func(x []int32) []uint16) {
+// testInt32x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x8ConvertToInt64(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int64x8, want func(x []int32) []int64) {
n := 8
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
a := archsimd.LoadInt32x8Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint8x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint16x32, want func(x []uint8) []uint16) {
+// testInt64x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x4ConvertToInt64(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int64x4, want func(x []int64) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToInt64(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int64x8, want func(x []uint8) []int64) {
n := 32
t.Helper()
forSlice(t, uint8s, n, func(x []uint8) bool {
t.Helper()
a := archsimd.LoadUint8x32Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint16x16, want func(x []uint16) []uint16) {
+// testUint16x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x16ConvertToInt64(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int64x8, want func(x []uint16) []int64) {
n := 16
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
a := archsimd.LoadUint16x16Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint16x8, want func(x []uint32) []uint16) {
+// testUint32x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x8ConvertToInt64(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int64x8, want func(x []uint32) []int64) {
n := 8
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
a := archsimd.LoadUint32x8Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x8ConvertToUint16(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint16x8, want func(x []float32) []uint16) {
+// testUint64x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x4ConvertToInt64(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int64x4, want func(x []uint64) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x8ConvertToInt64(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int64x8, want func(x []float32) []int64) {
n := 8
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
a := archsimd.LoadFloat32x8Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt16x32ConvertToUint16(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint16x32, want func(x []int16) []uint16) {
+// testFloat64x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x4ConvertToInt64(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int64x4, want func(x []float64) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToInt64(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int64x8, want func(x []int8) []int64) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]int64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToInt64(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int64x8, want func(x []int16) []int64) {
n := 32
t.Helper()
forSlice(t, int16s, n, func(x []int16) bool {
t.Helper()
a := archsimd.LoadInt16x32Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt32x16ConvertToUint16(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint16x16, want func(x []int32) []uint16) {
+// testInt32x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x16ConvertToInt64(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int64x8, want func(x []int32) []int64) {
n := 16
t.Helper()
forSlice(t, int32s, n, func(x []int32) bool {
t.Helper()
a := archsimd.LoadInt32x16Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testInt64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testInt64x8ConvertToUint16(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint16x8, want func(x []int64) []uint16) {
+// testInt64x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x8ConvertToInt64(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int64x8, want func(x []int64) []int64) {
n := 8
t.Helper()
forSlice(t, int64s, n, func(x []int64) bool {
t.Helper()
a := archsimd.LoadInt64x8Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint16x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint16x32, want func(x []uint16) []uint16) {
+// testUint8x64ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToInt64(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int64x8, want func(x []uint8) []int64) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]int64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToInt64(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int64x8, want func(x []uint16) []int64) {
n := 32
t.Helper()
forSlice(t, uint16s, n, func(x []uint16) bool {
t.Helper()
a := archsimd.LoadUint16x32Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint32x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint16x16, want func(x []uint32) []uint16) {
+// testUint32x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x16ConvertToInt64(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int64x8, want func(x []uint32) []int64) {
n := 16
t.Helper()
forSlice(t, uint32s, n, func(x []uint32) bool {
t.Helper()
a := archsimd.LoadUint32x16Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testUint64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testUint64x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint16x8, want func(x []uint64) []uint16) {
+// testUint64x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x8ConvertToInt64(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int64x8, want func(x []uint64) []int64) {
n := 8
t.Helper()
forSlice(t, uint64s, n, func(x []uint64) bool {
t.Helper()
a := archsimd.LoadUint64x8Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat32x16ConvertToUint16(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint16x16, want func(x []float32) []uint16) {
+// testFloat32x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x16ConvertToInt64(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int64x8, want func(x []float32) []int64) {
n := 16
t.Helper()
forSlice(t, float32s, n, func(x []float32) bool {
t.Helper()
a := archsimd.LoadFloat32x16Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
})
}
-// testFloat64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want
-// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width.
-func testFloat64x8ConvertToUint16(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint16x8, want func(x []float64) []uint16) {
+// testFloat64x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x8ConvertToInt64(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int64x8, want func(x []float64) []int64) {
n := 8
t.Helper()
forSlice(t, float64s, n, func(x []float64) bool {
t.Helper()
a := archsimd.LoadFloat64x8Slice(x)
- g := make([]uint16, n)
+ g := make([]int64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x16ConvertToUint64(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint64x8, want func(x []int8) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x8ConvertToUint64(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint64x8, want func(x []int16) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x4ConvertToUint64(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint64x4, want func(x []int32) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToUint64(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint64x2, want func(x []int64) []uint64) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x16ConvertToUint64(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint64x8, want func(x []uint8) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x8ConvertToUint64(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint64x8, want func(x []uint16) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x4ConvertToUint64(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint64x4, want func(x []uint32) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToUint64(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint64x2, want func(x []uint64) []uint64) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x4ConvertToUint64(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint64x4, want func(x []float32) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToUint64(t *testing.T, f func(x archsimd.Float64x2) archsimd.Uint64x2, want func(x []float64) []uint64) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToUint64(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint64x8, want func(x []int8) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x16ConvertToUint64(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint64x8, want func(x []int16) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x8ConvertToUint64(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint64x8, want func(x []int32) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x4ConvertToUint64(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint64x4, want func(x []int64) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToUint64(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint64x8, want func(x []uint8) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x16ConvertToUint64(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint64x8, want func(x []uint16) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x8ConvertToUint64(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint64x8, want func(x []uint32) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x4ConvertToUint64(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint64x4, want func(x []uint64) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x8ConvertToUint64(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint64x8, want func(x []float32) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x8Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x4ConvertToUint64(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint64x4, want func(x []float64) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToUint64(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint64x8, want func(x []int8) []uint64) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToUint64(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint64x8, want func(x []int16) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x16ConvertToUint64(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint64x8, want func(x []int32) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x8ConvertToUint64(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint64x8, want func(x []int64) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToUint64(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint64x8, want func(x []uint8) []uint64) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToUint64(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint64x8, want func(x []uint16) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x16ConvertToUint64(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint64x8, want func(x []uint32) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x8ConvertToUint64(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint64x8, want func(x []uint64) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x16ConvertToUint64(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint64x8, want func(x []float32) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x16Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x8ConvertToUint64(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint64x8, want func(x []float64) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x8Slice(x)
+ g := make([]uint64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x16ConvertToFloat32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Float32x16, want func(x []int8) []float32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x8ConvertToFloat32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Float32x8, want func(x []int16) []float32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]float32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x4ConvertToFloat32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Float32x4, want func(x []int32) []float32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]float32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToFloat32(t *testing.T, f func(x archsimd.Int64x2) archsimd.Float32x4, want func(x []int64) []float32) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]float32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x16ConvertToFloat32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Float32x16, want func(x []uint8) []float32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x8ConvertToFloat32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Float32x8, want func(x []uint16) []float32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]float32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x4ConvertToFloat32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Float32x4, want func(x []uint32) []float32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]float32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToFloat32(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Float32x4, want func(x []uint64) []float32) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]float32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x4ConvertToFloat32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Float32x4, want func(x []float32) []float32) {
+ n := 4
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x4Slice(x)
+ g := make([]float32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToFloat32(t *testing.T, f func(x archsimd.Float64x2) archsimd.Float32x4, want func(x []float64) []float32) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]float32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToFloat32(t *testing.T, f func(x archsimd.Int8x32) archsimd.Float32x16, want func(x []int8) []float32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x16ConvertToFloat32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Float32x16, want func(x []int16) []float32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x8ConvertToFloat32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Float32x8, want func(x []int32) []float32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]float32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x4ConvertToFloat32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Float32x4, want func(x []int64) []float32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]float32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToFloat32(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Float32x16, want func(x []uint8) []float32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x16ConvertToFloat32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Float32x16, want func(x []uint16) []float32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x8ConvertToFloat32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Float32x8, want func(x []uint32) []float32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]float32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x4ConvertToFloat32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Float32x4, want func(x []uint64) []float32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]float32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x8ConvertToFloat32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Float32x8, want func(x []float32) []float32) {
+ n := 8
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x8Slice(x)
+ g := make([]float32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x4ConvertToFloat32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Float32x4, want func(x []float64) []float32) {
+ n := 4
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x4Slice(x)
+ g := make([]float32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToFloat32(t *testing.T, f func(x archsimd.Int8x64) archsimd.Float32x16, want func(x []int8) []float32) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToFloat32(t *testing.T, f func(x archsimd.Int16x32) archsimd.Float32x16, want func(x []int16) []float32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x16ConvertToFloat32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Float32x16, want func(x []int32) []float32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x8ConvertToFloat32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Float32x8, want func(x []int64) []float32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]float32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToFloat32(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Float32x16, want func(x []uint8) []float32) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToFloat32(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Float32x16, want func(x []uint16) []float32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x16ConvertToFloat32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Float32x16, want func(x []uint32) []float32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x8ConvertToFloat32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Float32x8, want func(x []uint64) []float32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]float32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x16ConvertToFloat32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Float32x16, want func(x []float32) []float32) {
+ n := 16
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x16Slice(x)
+ g := make([]float32, 16)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x8ConvertToFloat32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Float32x8, want func(x []float64) []float32) {
+ n := 8
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x8Slice(x)
+ g := make([]float32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x16ConvertToFloat64(t *testing.T, f func(x archsimd.Int8x16) archsimd.Float64x8, want func(x []int8) []float64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x8ConvertToFloat64(t *testing.T, f func(x archsimd.Int16x8) archsimd.Float64x8, want func(x []int16) []float64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x4ConvertToFloat64(t *testing.T, f func(x archsimd.Int32x4) archsimd.Float64x4, want func(x []int32) []float64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]float64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x2ConvertToFloat64(t *testing.T, f func(x archsimd.Int64x2) archsimd.Float64x2, want func(x []int64) []float64) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]float64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x16ConvertToFloat64(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Float64x8, want func(x []uint8) []float64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x8ConvertToFloat64(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Float64x8, want func(x []uint16) []float64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x4ConvertToFloat64(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Float64x4, want func(x []uint32) []float64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]float64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x2ConvertToFloat64(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Float64x2, want func(x []uint64) []float64) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]float64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x4ConvertToFloat64(t *testing.T, f func(x archsimd.Float32x4) archsimd.Float64x4, want func(x []float32) []float64) {
+ n := 4
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x4Slice(x)
+ g := make([]float64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x2ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x2ConvertToFloat64(t *testing.T, f func(x archsimd.Float64x2) archsimd.Float64x2, want func(x []float64) []float64) {
+ n := 2
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x2Slice(x)
+ g := make([]float64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x32ConvertToFloat64(t *testing.T, f func(x archsimd.Int8x32) archsimd.Float64x8, want func(x []int8) []float64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x16ConvertToFloat64(t *testing.T, f func(x archsimd.Int16x16) archsimd.Float64x8, want func(x []int16) []float64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x8ConvertToFloat64(t *testing.T, f func(x archsimd.Int32x8) archsimd.Float64x8, want func(x []int32) []float64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x4ConvertToFloat64(t *testing.T, f func(x archsimd.Int64x4) archsimd.Float64x4, want func(x []int64) []float64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]float64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x32ConvertToFloat64(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Float64x8, want func(x []uint8) []float64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x16ConvertToFloat64(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Float64x8, want func(x []uint16) []float64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x8ConvertToFloat64(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Float64x8, want func(x []uint32) []float64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x4ConvertToFloat64(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Float64x4, want func(x []uint64) []float64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]float64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x8ConvertToFloat64(t *testing.T, f func(x archsimd.Float32x8) archsimd.Float64x8, want func(x []float32) []float64) {
+ n := 8
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x8Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x4ConvertToFloat64(t *testing.T, f func(x archsimd.Float64x4) archsimd.Float64x4, want func(x []float64) []float64) {
+ n := 4
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x4Slice(x)
+ g := make([]float64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt8x64ConvertToFloat64(t *testing.T, f func(x archsimd.Int8x64) archsimd.Float64x8, want func(x []int8) []float64) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt16x32ConvertToFloat64(t *testing.T, f func(x archsimd.Int16x32) archsimd.Float64x8, want func(x []int16) []float64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt32x16ConvertToFloat64(t *testing.T, f func(x archsimd.Int32x16) archsimd.Float64x8, want func(x []int32) []float64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testInt64x8ConvertToFloat64(t *testing.T, f func(x archsimd.Int64x8) archsimd.Float64x8, want func(x []int64) []float64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint8x64ConvertToFloat64(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Float64x8, want func(x []uint8) []float64) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint16x32ConvertToFloat64(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Float64x8, want func(x []uint16) []float64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint32x16ConvertToFloat64(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Float64x8, want func(x []uint32) []float64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testUint64x8ConvertToFloat64(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Float64x8, want func(x []uint64) []float64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat32x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat32x16ConvertToFloat64(t *testing.T, f func(x archsimd.Float32x16) archsimd.Float64x8, want func(x []float32) []float64) {
+ n := 16
+ t.Helper()
+ forSlice(t, float32s, n, func(x []float32) bool {
+ t.Helper()
+ a := archsimd.LoadFloat32x16Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testFloat64x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want.
+// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width,
+// (extended to at least 128 bits, or truncated to at most 512 bits).
+func testFloat64x8ConvertToFloat64(t *testing.T, f func(x archsimd.Float64x8) archsimd.Float64x8, want func(x []float64) []float64) {
+ n := 8
+ t.Helper()
+ forSlice(t, float64s, n, func(x []float64) bool {
+ t.Helper()
+ a := archsimd.LoadFloat64x8Slice(x)
+ g := make([]float64, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt8x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int64x2, want func(x []int8) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt16x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int64x2, want func(x []int16) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt32x4ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int64x2, want func(x []int32) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt64x2ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int64x2, want func(x []int64) []int64) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint8x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int64x2, want func(x []uint8) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint16x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int64x2, want func(x []uint16) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint32x4ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int64x2, want func(x []uint32) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint64x2ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int64x2, want func(x []uint64) []int64) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt8x32ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int64x2, want func(x []int8) []int64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt16x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int64x2, want func(x []int16) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt32x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int64x2, want func(x []int32) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt64x4ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int64x2, want func(x []int64) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint8x32ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int64x2, want func(x []uint8) []int64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint16x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int64x2, want func(x []uint16) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint32x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int64x2, want func(x []uint32) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint64x4ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int64x2, want func(x []uint64) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt8x64ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int64x2, want func(x []int8) []int64) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt16x32ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int64x2, want func(x []int16) []int64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt32x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int64x2, want func(x []int32) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt64x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int64x2, want func(x []int64) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint8x64ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int64x2, want func(x []uint8) []int64) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint16x32ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int64x2, want func(x []uint16) []int64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint32x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int64x2, want func(x []uint32) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint64x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int64x2, want func(x []uint64) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]int64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int64x4, want func(x []int8) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int64x4, want func(x []int16) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x4ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int64x4, want func(x []int32) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x2ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int64x4, want func(x []int64) []int64) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int64x4, want func(x []uint8) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int64x4, want func(x []uint16) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x4ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int64x4, want func(x []uint32) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x2ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int64x4, want func(x []uint64) []int64) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x32ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int64x4, want func(x []int8) []int64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int64x4, want func(x []int16) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int64x4, want func(x []int32) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x4ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int64x4, want func(x []int64) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x32ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int64x4, want func(x []uint8) []int64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int64x4, want func(x []uint16) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int64x4, want func(x []uint32) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x4ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int64x4, want func(x []uint64) []int64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x64ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int64x4, want func(x []int8) []int64) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x32ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int64x4, want func(x []int16) []int64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int64x4, want func(x []int32) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int64x4, want func(x []int64) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x64ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int64x4, want func(x []uint8) []int64) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x32ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int64x4, want func(x []uint16) []int64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int64x4, want func(x []uint32) []int64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int64x4, want func(x []uint64) []int64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]int64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt8x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint64x2, want func(x []int8) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt16x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint64x2, want func(x []int16) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt32x4ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint64x2, want func(x []int32) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt64x2ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint64x2, want func(x []int64) []uint64) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint8x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint64x2, want func(x []uint8) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint16x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint64x2, want func(x []uint16) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint32x4ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint64x2, want func(x []uint32) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint64x2ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint64x2, want func(x []uint64) []uint64) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt8x32ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint64x2, want func(x []int8) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt16x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint64x2, want func(x []int16) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt32x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint64x2, want func(x []int32) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt64x4ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint64x2, want func(x []int64) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint8x32ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint64x2, want func(x []uint8) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint16x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint64x2, want func(x []uint16) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint32x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint64x2, want func(x []uint32) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint64x4ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint64x2, want func(x []uint64) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt8x64ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint64x2, want func(x []int8) []uint64) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt16x32ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint64x2, want func(x []int16) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt32x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint64x2, want func(x []int32) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testInt64x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint64x2, want func(x []int64) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint8x64ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint64x2, want func(x []uint8) []uint64) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint16x32ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint64x2, want func(x []uint16) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint32x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint64x2, want func(x []uint32) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 2 elements.
+func testUint64x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint64x2, want func(x []uint64) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]uint64, 2)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint64x4, want func(x []int8) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint64x4, want func(x []int16) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x4ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint64x4, want func(x []int32) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x2ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint64x4, want func(x []int64) []uint64) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint64x4, want func(x []uint8) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint64x4, want func(x []uint16) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x4ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint64x4, want func(x []uint32) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x2ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint64x4, want func(x []uint64) []uint64) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x32ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint64x4, want func(x []int8) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint64x4, want func(x []int16) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint64x4, want func(x []int32) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x4ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint64x4, want func(x []int64) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x32ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint64x4, want func(x []uint8) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint64x4, want func(x []uint16) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint64x4, want func(x []uint32) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x4ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint64x4, want func(x []uint64) []uint64) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x64ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint64x4, want func(x []int8) []uint64) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x32ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint64x4, want func(x []int16) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint64x4, want func(x []int32) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint64x4, want func(x []int64) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x64ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint64x4, want func(x []uint8) []uint64) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x32ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint64x4, want func(x []uint16) []uint64) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint64x4, want func(x []uint32) []uint64) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint64x4, want func(x []uint64) []uint64) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]uint64, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int32x4, want func(x []int8) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int32x4, want func(x []int16) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x4ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int32x4, want func(x []int32) []int32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x2ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int32x4, want func(x []int64) []int32) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int32x4, want func(x []uint8) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int32x4, want func(x []uint16) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x4ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int32x4, want func(x []uint32) []int32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x2ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int32x4, want func(x []uint64) []int32) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x32ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int32x4, want func(x []int8) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int32x4, want func(x []int16) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int32x4, want func(x []int32) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x4ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int32x4, want func(x []int64) []int32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x32ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int32x4, want func(x []uint8) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int32x4, want func(x []uint16) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int32x4, want func(x []uint32) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x4ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int32x4, want func(x []uint64) []int32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x64ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int32x4, want func(x []int8) []int32) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x32ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int32x4, want func(x []int16) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int32x4, want func(x []int32) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int32x4, want func(x []int64) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x64ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int32x4, want func(x []uint8) []int32) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x32ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int32x4, want func(x []uint16) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int32x4, want func(x []uint32) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int32x4, want func(x []uint64) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]int32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int32x8, want func(x []int8) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int32x8, want func(x []int16) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x4ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int32x8, want func(x []int32) []int32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x2ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int32x8, want func(x []int64) []int32) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int32x8, want func(x []uint8) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int32x8, want func(x []uint16) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x4ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int32x8, want func(x []uint32) []int32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x2ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int32x8, want func(x []uint64) []int32) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x32ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int32x8, want func(x []int8) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int32x8, want func(x []int16) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int32x8, want func(x []int32) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x4ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int32x8, want func(x []int64) []int32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x32ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int32x8, want func(x []uint8) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int32x8, want func(x []uint16) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int32x8, want func(x []uint32) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x4ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int32x8, want func(x []uint64) []int32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x64ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int32x8, want func(x []int8) []int32) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x32ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int32x8, want func(x []int16) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int32x8, want func(x []int32) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int32x8, want func(x []int64) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x64ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int32x8, want func(x []uint8) []int32) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x32ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int32x8, want func(x []uint16) []int32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int32x8, want func(x []uint32) []int32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int32x8, want func(x []uint64) []int32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]int32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint32x4, want func(x []int8) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint32x4, want func(x []int16) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x4ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint32x4, want func(x []int32) []uint32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x2ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint32x4, want func(x []int64) []uint32) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint32x4, want func(x []uint8) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint32x4, want func(x []uint16) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x4ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint32x4, want func(x []uint32) []uint32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x2ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint32x4, want func(x []uint64) []uint32) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x32ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint32x4, want func(x []int8) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint32x4, want func(x []int16) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint32x4, want func(x []int32) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x4ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint32x4, want func(x []int64) []uint32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x32ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint32x4, want func(x []uint8) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint32x4, want func(x []uint16) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint32x4, want func(x []uint32) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x4ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint32x4, want func(x []uint64) []uint32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt8x64ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint32x4, want func(x []int8) []uint32) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt16x32ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint32x4, want func(x []int16) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt32x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint32x4, want func(x []int32) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testInt64x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint32x4, want func(x []int64) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint8x64ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint32x4, want func(x []uint8) []uint32) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint16x32ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint32x4, want func(x []uint16) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint32x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint32x4, want func(x []uint32) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 4 elements.
+func testUint64x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint32x4, want func(x []uint64) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]uint32, 4)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint32x8, want func(x []int8) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint32x8, want func(x []int16) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x4ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint32x8, want func(x []int32) []uint32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x2ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint32x8, want func(x []int64) []uint32) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint32x8, want func(x []uint8) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint32x8, want func(x []uint16) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x4ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint32x8, want func(x []uint32) []uint32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x2ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint32x8, want func(x []uint64) []uint32) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x32ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint32x8, want func(x []int8) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint32x8, want func(x []int16) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint32x8, want func(x []int32) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x4ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint32x8, want func(x []int64) []uint32) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x32ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint32x8, want func(x []uint8) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint32x8, want func(x []uint16) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint32x8, want func(x []uint32) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x4ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint32x8, want func(x []uint64) []uint32) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x64ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint32x8, want func(x []int8) []uint32) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x32ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint32x8, want func(x []int16) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint32x8, want func(x []int32) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint32x8, want func(x []int64) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x64ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint32x8, want func(x []uint8) []uint32) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x32ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint32x8, want func(x []uint16) []uint32) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint32x8, want func(x []uint32) []uint32) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint32x8, want func(x []uint64) []uint32) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]uint32, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int16x8, want func(x []int8) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int16x8, want func(x []int16) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x4ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int16x8, want func(x []int32) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x2ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int16x8, want func(x []int64) []int16) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int16x8, want func(x []uint8) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int16x8, want func(x []uint16) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x4ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int16x8, want func(x []uint32) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x2ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int16x8, want func(x []uint64) []int16) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x32ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int16x8, want func(x []int8) []int16) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int16x8, want func(x []int16) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int16x8, want func(x []int32) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x4ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int16x8, want func(x []int64) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x32ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int16x8, want func(x []uint8) []int16) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int16x8, want func(x []uint16) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int16x8, want func(x []uint32) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x4ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int16x8, want func(x []uint64) []int16) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x64ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int16x8, want func(x []int8) []int16) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x32ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int16x8, want func(x []int16) []int16) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int16x8, want func(x []int32) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int16x8, want func(x []int64) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x64ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int16x8, want func(x []uint8) []int16) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x32ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int16x8, want func(x []uint16) []int16) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int16x8, want func(x []uint32) []int16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int16x8, want func(x []uint64) []int16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]int16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint16x8, want func(x []int8) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x16Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint16x8, want func(x []int16) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x4ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x4ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint16x8, want func(x []int32) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x2ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x2ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint16x8, want func(x []int64) []uint16) {
+ n := 2
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x2Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint16x8, want func(x []uint8) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x16Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint16x8, want func(x []uint16) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x4ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x4ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint16x8, want func(x []uint32) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x2ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x2ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint16x8, want func(x []uint64) []uint16) {
+ n := 2
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x2Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x32ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x32ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint16x8, want func(x []int8) []uint16) {
+ n := 32
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x32Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint16x8, want func(x []int16) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x16Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint16x8, want func(x []int32) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x4ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x4ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint16x8, want func(x []int64) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x32ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x32ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint16x8, want func(x []uint8) []uint16) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x32Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint16x8, want func(x []uint16) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x16Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint16x8, want func(x []uint32) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x4ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x4ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint16x8, want func(x []uint64) []uint16) {
+ n := 4
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x4Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt8x64ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt8x64ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint16x8, want func(x []int8) []uint16) {
+ n := 64
+ t.Helper()
+ forSlice(t, int8s, n, func(x []int8) bool {
+ t.Helper()
+ a := archsimd.LoadInt8x64Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt16x32ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt16x32ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint16x8, want func(x []int16) []uint16) {
+ n := 32
+ t.Helper()
+ forSlice(t, int16s, n, func(x []int16) bool {
+ t.Helper()
+ a := archsimd.LoadInt16x32Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt32x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt32x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint16x8, want func(x []int32) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, int32s, n, func(x []int32) bool {
+ t.Helper()
+ a := archsimd.LoadInt32x16Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testInt64x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testInt64x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint16x8, want func(x []int64) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, int64s, n, func(x []int64) bool {
+ t.Helper()
+ a := archsimd.LoadInt64x8Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint8x64ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint8x64ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint16x8, want func(x []uint8) []uint16) {
+ n := 64
+ t.Helper()
+ forSlice(t, uint8s, n, func(x []uint8) bool {
+ t.Helper()
+ a := archsimd.LoadUint8x64Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint16x32ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint16x32ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint16x8, want func(x []uint16) []uint16) {
+ n := 32
+ t.Helper()
+ forSlice(t, uint16s, n, func(x []uint16) bool {
+ t.Helper()
+ a := archsimd.LoadUint16x32Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint32x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint32x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint16x8, want func(x []uint32) []uint16) {
+ n := 16
+ t.Helper()
+ forSlice(t, uint32s, n, func(x []uint32) bool {
+ t.Helper()
+ a := archsimd.LoadUint32x16Slice(x)
+ g := make([]uint16, 8)
+ f(a).StoreSlice(g)
+ w := want(x)
+ return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
+ })
+}
+
+// testUint64x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want.
+// This converts only the low 8 elements.
+func testUint64x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint16x8, want func(x []uint64) []uint16) {
+ n := 8
+ t.Helper()
+ forSlice(t, uint64s, n, func(x []uint64) bool {
+ t.Helper()
+ a := archsimd.LoadUint64x8Slice(x)
+ g := make([]uint16, 8)
f(a).StoreSlice(g)
w := want(x)
return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) })
diff --git a/src/simd/archsimd/internal/simd_test/unary_test.go b/src/simd/archsimd/internal/simd_test/unary_test.go
index 6b53669d78..097feb60ed 100644
--- a/src/simd/archsimd/internal/simd_test/unary_test.go
+++ b/src/simd/archsimd/internal/simd_test/unary_test.go
@@ -69,20 +69,26 @@ func TestSqrt(t *testing.T) {
func TestNot(t *testing.T) {
testInt8x16Unary(t, archsimd.Int8x16.Not, map1[int8](not))
- testInt8x32Unary(t, archsimd.Int8x32.Not, map1[int8](not))
testInt16x8Unary(t, archsimd.Int16x8.Not, map1[int16](not))
- testInt16x16Unary(t, archsimd.Int16x16.Not, map1[int16](not))
testInt32x4Unary(t, archsimd.Int32x4.Not, map1[int32](not))
- testInt32x8Unary(t, archsimd.Int32x8.Not, map1[int32](not))
+
+ if archsimd.X86.AVX2() {
+ testInt8x32Unary(t, archsimd.Int8x32.Not, map1[int8](not))
+ testInt16x16Unary(t, archsimd.Int16x16.Not, map1[int16](not))
+ testInt32x8Unary(t, archsimd.Int32x8.Not, map1[int32](not))
+ }
}
func TestAbsolute(t *testing.T) {
testInt8x16Unary(t, archsimd.Int8x16.Abs, map1[int8](abs))
- testInt8x32Unary(t, archsimd.Int8x32.Abs, map1[int8](abs))
testInt16x8Unary(t, archsimd.Int16x8.Abs, map1[int16](abs))
- testInt16x16Unary(t, archsimd.Int16x16.Abs, map1[int16](abs))
testInt32x4Unary(t, archsimd.Int32x4.Abs, map1[int32](abs))
- testInt32x8Unary(t, archsimd.Int32x8.Abs, map1[int32](abs))
+
+ if archsimd.X86.AVX2() {
+ testInt8x32Unary(t, archsimd.Int8x32.Abs, map1[int8](abs))
+ testInt16x16Unary(t, archsimd.Int16x16.Abs, map1[int16](abs))
+ testInt32x8Unary(t, archsimd.Int32x8.Abs, map1[int32](abs))
+ }
if archsimd.X86.AVX512() {
testInt8x64Unary(t, archsimd.Int8x64.Abs, map1[int8](abs))
testInt16x32Unary(t, archsimd.Int16x32.Abs, map1[int16](abs))
@@ -110,28 +116,199 @@ func TestCeilScaledResidue(t *testing.T) {
map1[float64](func(x float64) float64 { return x - math.Ceil(x) }))
}
-func TestToUint32(t *testing.T) {
- if !archsimd.X86.AVX512() {
- t.Skip("Needs AVX512")
+func TestConvert(t *testing.T) {
+ testFloat64x2ConvertToFloat32(t, archsimd.Float64x2.ConvertToFloat32, map1n[float64](toFloat32, 4))
+ testFloat64x4ConvertToFloat32(t, archsimd.Float64x4.ConvertToFloat32, map1[float64](toFloat32))
+ testFloat32x4ConvertToFloat64(t, archsimd.Float32x4.ConvertToFloat64, map1[float32](toFloat64))
+
+ testFloat32x4ConvertToInt32(t, archsimd.Float32x4.ConvertToInt32, map1[float32](floatToInt32_x86))
+ testFloat32x8ConvertToInt32(t, archsimd.Float32x8.ConvertToInt32, map1[float32](floatToInt32_x86))
+ testFloat64x2ConvertToInt32(t, archsimd.Float64x2.ConvertToInt32, map1n[float64](floatToInt32_x86, 4))
+ testFloat64x4ConvertToInt32(t, archsimd.Float64x4.ConvertToInt32, map1[float64](floatToInt32_x86))
+
+ testInt32x4ConvertToFloat32(t, archsimd.Int32x4.ConvertToFloat32, map1[int32](toFloat32))
+ testInt32x8ConvertToFloat32(t, archsimd.Int32x8.ConvertToFloat32, map1[int32](toFloat32))
+ testInt32x4ConvertToFloat64(t, archsimd.Int32x4.ConvertToFloat64, map1[int32](toFloat64))
+
+ if archsimd.X86.AVX512() {
+ testFloat32x8ConvertToFloat64(t, archsimd.Float32x8.ConvertToFloat64, map1[float32](toFloat64))
+ testFloat64x8ConvertToFloat32(t, archsimd.Float64x8.ConvertToFloat32, map1[float64](toFloat32))
+
+ testFloat32x16ConvertToInt32(t, archsimd.Float32x16.ConvertToInt32, map1[float32](floatToInt32_x86))
+ testFloat64x8ConvertToInt32(t, archsimd.Float64x8.ConvertToInt32, map1[float64](floatToInt32_x86))
+ testFloat32x4ConvertToInt64(t, archsimd.Float32x4.ConvertToInt64, map1[float32](floatToInt64_x86))
+ testFloat32x8ConvertToInt64(t, archsimd.Float32x8.ConvertToInt64, map1[float32](floatToInt64_x86))
+ testFloat64x2ConvertToInt64(t, archsimd.Float64x2.ConvertToInt64, map1[float64](floatToInt64_x86))
+ testFloat64x4ConvertToInt64(t, archsimd.Float64x4.ConvertToInt64, map1[float64](floatToInt64_x86))
+ testFloat64x8ConvertToInt64(t, archsimd.Float64x8.ConvertToInt64, map1[float64](floatToInt64_x86))
+
+ testFloat32x4ConvertToUint32(t, archsimd.Float32x4.ConvertToUint32, map1[float32](floatToUint32_x86))
+ testFloat32x8ConvertToUint32(t, archsimd.Float32x8.ConvertToUint32, map1[float32](floatToUint32_x86))
+ testFloat32x16ConvertToUint32(t, archsimd.Float32x16.ConvertToUint32, map1[float32](floatToUint32_x86))
+ testFloat64x2ConvertToUint32(t, archsimd.Float64x2.ConvertToUint32, map1n[float64](floatToUint32_x86, 4))
+ testFloat64x4ConvertToUint32(t, archsimd.Float64x4.ConvertToUint32, map1[float64](floatToUint32_x86))
+ testFloat64x8ConvertToUint32(t, archsimd.Float64x8.ConvertToUint32, map1[float64](floatToUint32_x86))
+ testFloat32x4ConvertToUint64(t, archsimd.Float32x4.ConvertToUint64, map1[float32](floatToUint64_x86))
+ testFloat32x8ConvertToUint64(t, archsimd.Float32x8.ConvertToUint64, map1[float32](floatToUint64_x86))
+ testFloat64x2ConvertToUint64(t, archsimd.Float64x2.ConvertToUint64, map1[float64](floatToUint64_x86))
+ testFloat64x4ConvertToUint64(t, archsimd.Float64x4.ConvertToUint64, map1[float64](floatToUint64_x86))
+ testFloat64x8ConvertToUint64(t, archsimd.Float64x8.ConvertToUint64, map1[float64](floatToUint64_x86))
+
+ testInt32x16ConvertToFloat32(t, archsimd.Int32x16.ConvertToFloat32, map1[int32](toFloat32))
+ testInt64x2ConvertToFloat32(t, archsimd.Int64x2.ConvertToFloat32, map1n[int64](toFloat32, 4))
+ testInt64x4ConvertToFloat32(t, archsimd.Int64x4.ConvertToFloat32, map1[int64](toFloat32))
+ testInt64x8ConvertToFloat32(t, archsimd.Int64x8.ConvertToFloat32, map1[int64](toFloat32))
+ testInt64x2ConvertToFloat64(t, archsimd.Int64x2.ConvertToFloat64, map1[int64](toFloat64))
+ testInt64x4ConvertToFloat64(t, archsimd.Int64x4.ConvertToFloat64, map1[int64](toFloat64))
+ testInt64x8ConvertToFloat64(t, archsimd.Int64x8.ConvertToFloat64, map1[int64](toFloat64))
+
+ testUint32x4ConvertToFloat32(t, archsimd.Uint32x4.ConvertToFloat32, map1[uint32](toFloat32))
+ testUint32x8ConvertToFloat32(t, archsimd.Uint32x8.ConvertToFloat32, map1[uint32](toFloat32))
+ testUint32x16ConvertToFloat32(t, archsimd.Uint32x16.ConvertToFloat32, map1[uint32](toFloat32))
+ testUint64x2ConvertToFloat32(t, archsimd.Uint64x2.ConvertToFloat32, map1n[uint64](toFloat32, 4))
+ testUint64x4ConvertToFloat32(t, archsimd.Uint64x4.ConvertToFloat32, map1[uint64](toFloat32))
+ testUint64x8ConvertToFloat32(t, archsimd.Uint64x8.ConvertToFloat32, map1[uint64](toFloat32))
+ testUint32x4ConvertToFloat64(t, archsimd.Uint32x4.ConvertToFloat64, map1[uint32](toFloat64))
+ testUint32x8ConvertToFloat64(t, archsimd.Uint32x8.ConvertToFloat64, map1[uint32](toFloat64))
+ testUint64x2ConvertToFloat64(t, archsimd.Uint64x2.ConvertToFloat64, map1[uint64](toFloat64))
+ testUint64x4ConvertToFloat64(t, archsimd.Uint64x4.ConvertToFloat64, map1[uint64](toFloat64))
+ testUint64x8ConvertToFloat64(t, archsimd.Uint64x8.ConvertToFloat64, map1[uint64](toFloat64))
+ }
+}
+
+func TestExtend(t *testing.T) {
+ if archsimd.X86.AVX2() {
+ testInt8x16ConvertToInt16(t, archsimd.Int8x16.ExtendToInt16, map1[int8](toInt16))
+ testInt16x8ConvertToInt32(t, archsimd.Int16x8.ExtendToInt32, map1[int16](toInt32))
+ testInt32x4ConvertToInt64(t, archsimd.Int32x4.ExtendToInt64, map1[int32](toInt64))
+ testUint8x16ConvertToUint16(t, archsimd.Uint8x16.ExtendToUint16, map1[uint8](toUint16))
+ testUint16x8ConvertToUint32(t, archsimd.Uint16x8.ExtendToUint32, map1[uint16](toUint32))
+ testUint32x4ConvertToUint64(t, archsimd.Uint32x4.ExtendToUint64, map1[uint32](toUint64))
+ }
+
+ if archsimd.X86.AVX512() {
+ testInt8x32ConvertToInt16(t, archsimd.Int8x32.ExtendToInt16, map1[int8](toInt16))
+ testInt8x16ConvertToInt32(t, archsimd.Int8x16.ExtendToInt32, map1[int8](toInt32))
+ testInt16x16ConvertToInt32(t, archsimd.Int16x16.ExtendToInt32, map1[int16](toInt32))
+ testInt16x8ConvertToInt64(t, archsimd.Int16x8.ExtendToInt64, map1[int16](toInt64))
+ testInt32x8ConvertToInt64(t, archsimd.Int32x8.ExtendToInt64, map1[int32](toInt64))
+ testUint8x32ConvertToUint16(t, archsimd.Uint8x32.ExtendToUint16, map1[uint8](toUint16))
+ testUint8x16ConvertToUint32(t, archsimd.Uint8x16.ExtendToUint32, map1[uint8](toUint32))
+ testUint16x16ConvertToUint32(t, archsimd.Uint16x16.ExtendToUint32, map1[uint16](toUint32))
+ testUint16x8ConvertToUint64(t, archsimd.Uint16x8.ExtendToUint64, map1[uint16](toUint64))
+ testUint32x8ConvertToUint64(t, archsimd.Uint32x8.ExtendToUint64, map1[uint32](toUint64))
}
- testFloat32x4ConvertToUint32(t, archsimd.Float32x4.ConvertToUint32, map1[float32](toUint32))
- testFloat32x8ConvertToUint32(t, archsimd.Float32x8.ConvertToUint32, map1[float32](toUint32))
- testFloat32x16ConvertToUint32(t, archsimd.Float32x16.ConvertToUint32, map1[float32](toUint32))
}
-func TestToInt32(t *testing.T) {
- testFloat32x4ConvertToInt32(t, archsimd.Float32x4.ConvertToInt32, map1[float32](toInt32))
- testFloat32x8ConvertToInt32(t, archsimd.Float32x8.ConvertToInt32, map1[float32](toInt32))
+func TestExtendLo(t *testing.T) {
+ testInt8x16ConvertLoToInt64x2(t, archsimd.Int8x16.ExtendLo2ToInt64, map1n[int8](toInt64, 2))
+ testInt16x8ConvertLoToInt64x2(t, archsimd.Int16x8.ExtendLo2ToInt64, map1n[int16](toInt64, 2))
+ testInt32x4ConvertLoToInt64x2(t, archsimd.Int32x4.ExtendLo2ToInt64, map1n[int32](toInt64, 2))
+ testUint8x16ConvertLoToUint64x2(t, archsimd.Uint8x16.ExtendLo2ToUint64, map1n[uint8](toUint64, 2))
+ testUint16x8ConvertLoToUint64x2(t, archsimd.Uint16x8.ExtendLo2ToUint64, map1n[uint16](toUint64, 2))
+ testUint32x4ConvertLoToUint64x2(t, archsimd.Uint32x4.ExtendLo2ToUint64, map1n[uint32](toUint64, 2))
+ testInt8x16ConvertLoToInt32x4(t, archsimd.Int8x16.ExtendLo4ToInt32, map1n[int8](toInt32, 4))
+ testInt16x8ConvertLoToInt32x4(t, archsimd.Int16x8.ExtendLo4ToInt32, map1n[int16](toInt32, 4))
+ testUint8x16ConvertLoToUint32x4(t, archsimd.Uint8x16.ExtendLo4ToUint32, map1n[uint8](toUint32, 4))
+ testUint16x8ConvertLoToUint32x4(t, archsimd.Uint16x8.ExtendLo4ToUint32, map1n[uint16](toUint32, 4))
+ testInt8x16ConvertLoToInt16x8(t, archsimd.Int8x16.ExtendLo8ToInt16, map1n[int8](toInt16, 8))
+ testUint8x16ConvertLoToUint16x8(t, archsimd.Uint8x16.ExtendLo8ToUint16, map1n[uint8](toUint16, 8))
+
+ if archsimd.X86.AVX2() {
+ testInt8x16ConvertLoToInt64x4(t, archsimd.Int8x16.ExtendLo4ToInt64, map1n[int8](toInt64, 4))
+ testInt16x8ConvertLoToInt64x4(t, archsimd.Int16x8.ExtendLo4ToInt64, map1n[int16](toInt64, 4))
+ testUint8x16ConvertLoToUint64x4(t, archsimd.Uint8x16.ExtendLo4ToUint64, map1n[uint8](toUint64, 4))
+ testUint16x8ConvertLoToUint64x4(t, archsimd.Uint16x8.ExtendLo4ToUint64, map1n[uint16](toUint64, 4))
+ testInt8x16ConvertLoToInt32x8(t, archsimd.Int8x16.ExtendLo8ToInt32, map1n[int8](toInt32, 8))
+ testUint8x16ConvertLoToUint32x8(t, archsimd.Uint8x16.ExtendLo8ToUint32, map1n[uint8](toUint32, 8))
+ }
+
+ if archsimd.X86.AVX512() {
+ testInt8x16ConvertToInt64(t, archsimd.Int8x16.ExtendLo8ToInt64, map1n[int8](toInt64, 8))
+ testUint8x16ConvertToUint64(t, archsimd.Uint8x16.ExtendLo8ToUint64, map1n[uint8](toUint64, 8))
+ }
}
-func TestConverts(t *testing.T) {
- testUint8x16ConvertToUint16(t, archsimd.Uint8x16.ExtendToUint16, map1[uint8](toUint16))
- testUint16x8ConvertToUint32(t, archsimd.Uint16x8.ExtendToUint32, map1[uint16](toUint32))
+func TestTruncate(t *testing.T) {
+ if archsimd.X86.AVX512() {
+ testInt16x8ConvertToInt8(t, archsimd.Int16x8.TruncateToInt8, map1n[int16](toInt8, 16))
+ testInt16x16ConvertToInt8(t, archsimd.Int16x16.TruncateToInt8, map1[int16](toInt8))
+ testInt16x32ConvertToInt8(t, archsimd.Int16x32.TruncateToInt8, map1[int16](toInt8))
+ testInt32x4ConvertToInt8(t, archsimd.Int32x4.TruncateToInt8, map1n[int32](toInt8, 16))
+ testInt32x8ConvertToInt8(t, archsimd.Int32x8.TruncateToInt8, map1n[int32](toInt8, 16))
+ testInt32x16ConvertToInt8(t, archsimd.Int32x16.TruncateToInt8, map1[int32](toInt8))
+ testInt64x2ConvertToInt8(t, archsimd.Int64x2.TruncateToInt8, map1n[int64](toInt8, 16))
+ testInt64x4ConvertToInt8(t, archsimd.Int64x4.TruncateToInt8, map1n[int64](toInt8, 16))
+ testInt64x8ConvertToInt8(t, archsimd.Int64x8.TruncateToInt8, map1n[int64](toInt8, 16))
+ testInt32x4ConvertToInt16(t, archsimd.Int32x4.TruncateToInt16, map1n[int32](toInt16, 8))
+ testInt32x8ConvertToInt16(t, archsimd.Int32x8.TruncateToInt16, map1[int32](toInt16))
+ testInt32x16ConvertToInt16(t, archsimd.Int32x16.TruncateToInt16, map1[int32](toInt16))
+ testInt64x2ConvertToInt16(t, archsimd.Int64x2.TruncateToInt16, map1n[int64](toInt16, 8))
+ testInt64x4ConvertToInt16(t, archsimd.Int64x4.TruncateToInt16, map1n[int64](toInt16, 8))
+ testInt64x8ConvertToInt16(t, archsimd.Int64x8.TruncateToInt16, map1[int64](toInt16))
+ testInt64x2ConvertToInt32(t, archsimd.Int64x2.TruncateToInt32, map1n[int64](toInt32, 4))
+ testInt64x4ConvertToInt32(t, archsimd.Int64x4.TruncateToInt32, map1[int64](toInt32))
+ testInt64x8ConvertToInt32(t, archsimd.Int64x8.TruncateToInt32, map1[int64](toInt32))
+
+ testUint16x8ConvertToUint8(t, archsimd.Uint16x8.TruncateToUint8, map1n[uint16](toUint8, 16))
+ testUint16x16ConvertToUint8(t, archsimd.Uint16x16.TruncateToUint8, map1[uint16](toUint8))
+ testUint16x32ConvertToUint8(t, archsimd.Uint16x32.TruncateToUint8, map1[uint16](toUint8))
+ testUint32x4ConvertToUint8(t, archsimd.Uint32x4.TruncateToUint8, map1n[uint32](toUint8, 16))
+ testUint32x8ConvertToUint8(t, archsimd.Uint32x8.TruncateToUint8, map1n[uint32](toUint8, 16))
+ testUint32x16ConvertToUint8(t, archsimd.Uint32x16.TruncateToUint8, map1[uint32](toUint8))
+ testUint64x2ConvertToUint8(t, archsimd.Uint64x2.TruncateToUint8, map1n[uint64](toUint8, 16))
+ testUint64x4ConvertToUint8(t, archsimd.Uint64x4.TruncateToUint8, map1n[uint64](toUint8, 16))
+ testUint64x8ConvertToUint8(t, archsimd.Uint64x8.TruncateToUint8, map1n[uint64](toUint8, 16))
+ testUint32x4ConvertToUint16(t, archsimd.Uint32x4.TruncateToUint16, map1n[uint32](toUint16, 8))
+ testUint32x8ConvertToUint16(t, archsimd.Uint32x8.TruncateToUint16, map1[uint32](toUint16))
+ testUint32x16ConvertToUint16(t, archsimd.Uint32x16.TruncateToUint16, map1[uint32](toUint16))
+ testUint64x2ConvertToUint16(t, archsimd.Uint64x2.TruncateToUint16, map1n[uint64](toUint16, 8))
+ testUint64x4ConvertToUint16(t, archsimd.Uint64x4.TruncateToUint16, map1n[uint64](toUint16, 8))
+ testUint64x8ConvertToUint16(t, archsimd.Uint64x8.TruncateToUint16, map1[uint64](toUint16))
+ testUint64x2ConvertToUint32(t, archsimd.Uint64x2.TruncateToUint32, map1n[uint64](toUint32, 4))
+ testUint64x4ConvertToUint32(t, archsimd.Uint64x4.TruncateToUint32, map1[uint64](toUint32))
+ testUint64x8ConvertToUint32(t, archsimd.Uint64x8.TruncateToUint32, map1[uint64](toUint32))
+ }
}
-func TestConvertsAVX512(t *testing.T) {
- if !archsimd.X86.AVX512() {
- t.Skip("Needs AVX512")
+func TestSaturate(t *testing.T) {
+ if archsimd.X86.AVX512() {
+ testInt16x8ConvertToInt8(t, archsimd.Int16x8.SaturateToInt8, map1n[int16](satToInt8, 16))
+ testInt16x16ConvertToInt8(t, archsimd.Int16x16.SaturateToInt8, map1[int16](satToInt8))
+ testInt16x32ConvertToInt8(t, archsimd.Int16x32.SaturateToInt8, map1[int16](satToInt8))
+ testInt32x4ConvertToInt8(t, archsimd.Int32x4.SaturateToInt8, map1n[int32](satToInt8, 16))
+ testInt32x8ConvertToInt8(t, archsimd.Int32x8.SaturateToInt8, map1n[int32](satToInt8, 16))
+ testInt32x16ConvertToInt8(t, archsimd.Int32x16.SaturateToInt8, map1[int32](satToInt8))
+ testInt64x2ConvertToInt8(t, archsimd.Int64x2.SaturateToInt8, map1n[int64](satToInt8, 16))
+ testInt64x4ConvertToInt8(t, archsimd.Int64x4.SaturateToInt8, map1n[int64](satToInt8, 16))
+ testInt64x8ConvertToInt8(t, archsimd.Int64x8.SaturateToInt8, map1n[int64](satToInt8, 16))
+ testInt32x4ConvertToInt16(t, archsimd.Int32x4.SaturateToInt16, map1n[int32](satToInt16, 8))
+ testInt32x8ConvertToInt16(t, archsimd.Int32x8.SaturateToInt16, map1[int32](satToInt16))
+ testInt32x16ConvertToInt16(t, archsimd.Int32x16.SaturateToInt16, map1[int32](satToInt16))
+ testInt64x2ConvertToInt16(t, archsimd.Int64x2.SaturateToInt16, map1n[int64](satToInt16, 8))
+ testInt64x4ConvertToInt16(t, archsimd.Int64x4.SaturateToInt16, map1n[int64](satToInt16, 8))
+ testInt64x8ConvertToInt16(t, archsimd.Int64x8.SaturateToInt16, map1[int64](satToInt16))
+ testInt64x2ConvertToInt32(t, archsimd.Int64x2.SaturateToInt32, map1n[int64](satToInt32, 4))
+ testInt64x4ConvertToInt32(t, archsimd.Int64x4.SaturateToInt32, map1[int64](satToInt32))
+ testInt64x8ConvertToInt32(t, archsimd.Int64x8.SaturateToInt32, map1[int64](satToInt32))
+
+ testUint16x8ConvertToUint8(t, archsimd.Uint16x8.SaturateToUint8, map1n[uint16](satToUint8, 16))
+ testUint16x16ConvertToUint8(t, archsimd.Uint16x16.SaturateToUint8, map1[uint16](satToUint8))
+ testUint16x32ConvertToUint8(t, archsimd.Uint16x32.SaturateToUint8, map1[uint16](satToUint8))
+ testUint32x4ConvertToUint8(t, archsimd.Uint32x4.SaturateToUint8, map1n[uint32](satToUint8, 16))
+ testUint32x8ConvertToUint8(t, archsimd.Uint32x8.SaturateToUint8, map1n[uint32](satToUint8, 16))
+ testUint32x16ConvertToUint8(t, archsimd.Uint32x16.SaturateToUint8, map1[uint32](satToUint8))
+ testUint64x2ConvertToUint8(t, archsimd.Uint64x2.SaturateToUint8, map1n[uint64](satToUint8, 16))
+ testUint64x4ConvertToUint8(t, archsimd.Uint64x4.SaturateToUint8, map1n[uint64](satToUint8, 16))
+ testUint64x8ConvertToUint8(t, archsimd.Uint64x8.SaturateToUint8, map1n[uint64](satToUint8, 16))
+ testUint32x4ConvertToUint16(t, archsimd.Uint32x4.SaturateToUint16, map1n[uint32](satToUint16, 8))
+ testUint32x8ConvertToUint16(t, archsimd.Uint32x8.SaturateToUint16, map1[uint32](satToUint16))
+ testUint32x16ConvertToUint16(t, archsimd.Uint32x16.SaturateToUint16, map1[uint32](satToUint16))
+ testUint64x2ConvertToUint16(t, archsimd.Uint64x2.SaturateToUint16, map1n[uint64](satToUint16, 8))
+ testUint64x4ConvertToUint16(t, archsimd.Uint64x4.SaturateToUint16, map1n[uint64](satToUint16, 8))
+ testUint64x8ConvertToUint16(t, archsimd.Uint64x8.SaturateToUint16, map1[uint64](satToUint16))
+ testUint64x2ConvertToUint32(t, archsimd.Uint64x2.SaturateToUint32, map1n[uint64](satToUint32, 4))
+ testUint64x4ConvertToUint32(t, archsimd.Uint64x4.SaturateToUint32, map1[uint64](satToUint32))
+ testUint64x8ConvertToUint32(t, archsimd.Uint64x8.SaturateToUint32, map1[uint64](satToUint32))
}
- testUint8x32ConvertToUint16(t, archsimd.Uint8x32.ExtendToUint16, map1[uint8](toUint16))
}
diff --git a/src/simd/archsimd/maskmerge_gen_amd64.go b/src/simd/archsimd/maskmerge_gen_amd64.go
index 5e9ea394b3..ad56521714 100644
--- a/src/simd/archsimd/maskmerge_gen_amd64.go
+++ b/src/simd/archsimd/maskmerge_gen_amd64.go
@@ -1,4 +1,4 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
//go:build goexperiment.simd
@@ -286,7 +286,7 @@ func (x Int8x64) Masked(mask Mask8x64) Int8x64 {
return im.And(x)
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Int8x64) Merge(y Int8x64, mask Mask8x64) Int8x64 {
return y.blendMasked(x, mask)
}
@@ -297,7 +297,7 @@ func (x Int16x32) Masked(mask Mask16x32) Int16x32 {
return im.And(x)
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Int16x32) Merge(y Int16x32, mask Mask16x32) Int16x32 {
return y.blendMasked(x, mask)
}
@@ -308,7 +308,7 @@ func (x Int32x16) Masked(mask Mask32x16) Int32x16 {
return im.And(x)
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Int32x16) Merge(y Int32x16, mask Mask32x16) Int32x16 {
return y.blendMasked(x, mask)
}
@@ -319,7 +319,7 @@ func (x Int64x8) Masked(mask Mask64x8) Int64x8 {
return im.And(x)
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Int64x8) Merge(y Int64x8, mask Mask64x8) Int64x8 {
return y.blendMasked(x, mask)
}
@@ -330,7 +330,7 @@ func (x Uint8x64) Masked(mask Mask8x64) Uint8x64 {
return x.AsInt8x64().And(im).AsUint8x64()
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 {
ix := x.AsInt8x64()
iy := y.AsInt8x64()
@@ -343,7 +343,7 @@ func (x Uint16x32) Masked(mask Mask16x32) Uint16x32 {
return x.AsInt16x32().And(im).AsUint16x32()
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 {
ix := x.AsInt16x32()
iy := y.AsInt16x32()
@@ -356,7 +356,7 @@ func (x Uint32x16) Masked(mask Mask32x16) Uint32x16 {
return x.AsInt32x16().And(im).AsUint32x16()
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 {
ix := x.AsInt32x16()
iy := y.AsInt32x16()
@@ -369,7 +369,7 @@ func (x Uint64x8) Masked(mask Mask64x8) Uint64x8 {
return x.AsInt64x8().And(im).AsUint64x8()
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 {
ix := x.AsInt64x8()
iy := y.AsInt64x8()
@@ -382,7 +382,7 @@ func (x Float32x16) Masked(mask Mask32x16) Float32x16 {
return x.AsInt32x16().And(im).AsFloat32x16()
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 {
ix := x.AsInt32x16()
iy := y.AsInt32x16()
@@ -395,7 +395,7 @@ func (x Float64x8) Masked(mask Mask64x8) Float64x8 {
return x.AsInt64x8().And(im).AsFloat64x8()
}
-// Merge returns x but with elements set to y where m is false.
+// Merge returns x but with elements set to y where mask is false.
func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 {
ix := x.AsInt64x8()
iy := y.AsInt64x8()
diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go
index acd5719e6e..eba340c793 100644
--- a/src/simd/archsimd/ops_amd64.go
+++ b/src/simd/archsimd/ops_amd64.go
@@ -1,4 +1,4 @@
-// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
//go:build goexperiment.simd
@@ -349,90 +349,101 @@ func (x Uint64x8) Add(y Uint64x8) Uint64x8
/* AddPairs */
// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
// Asm: VHADDPS, CPU Feature: AVX
func (x Float32x4) AddPairs(y Float32x4) Float32x4
// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPS, CPU Feature: AVX
-func (x Float32x8) AddPairs(y Float32x8) Float32x8
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// For x = [x0, x1] and y = [y0, y1], the result is [x0+x1, y0+y1].
//
// Asm: VHADDPD, CPU Feature: AVX
func (x Float64x2) AddPairs(y Float64x2) Float64x2
// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
-// Asm: VHADDPD, CPU Feature: AVX
-func (x Float64x4) AddPairs(y Float64x4) Float64x4
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Int16x8) AddPairs(y Int16x8) Int16x8
// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Int16x8) AddPairs(y Int16x8) Int16x8
+// Asm: VPHADDD, CPU Feature: AVX
+func (x Int32x4) AddPairs(y Int32x4) Int32x4
// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
-// Asm: VPHADDW, CPU Feature: AVX2
-func (x Int16x16) AddPairs(y Int16x16) Int16x16
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8
// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX
-func (x Int32x4) AddPairs(y Int32x4) Int32x4
+func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+/* AddPairsGrouped */
+
+// AddPairsGrouped horizontally adds adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
-// Asm: VPHADDD, CPU Feature: AVX2
-func (x Int32x8) AddPairs(y Int32x8) Int32x8
+// Asm: VHADDPS, CPU Feature: AVX
+func (x Float32x8) AddPairsGrouped(y Float32x8) Float32x8
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// AddPairsGrouped horizontally adds adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1] and y = [y0, y1], the result is [x0+x1, y0+y1].
//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8
+// Asm: VHADDPD, CPU Feature: AVX
+func (x Float64x4) AddPairsGrouped(y Float64x4) Float64x4
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// AddPairsGrouped horizontally adds adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
// Asm: VPHADDW, CPU Feature: AVX2
-func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16
+func (x Int16x16) AddPairsGrouped(y Int16x16) Int16x16
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// AddPairsGrouped horizontally adds adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
-// Asm: VPHADDD, CPU Feature: AVX
-func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4
+// Asm: VPHADDD, CPU Feature: AVX2
+func (x Int32x8) AddPairsGrouped(y Int32x8) Int32x8
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// AddPairsGrouped horizontally adds adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
+//
+// Asm: VPHADDW, CPU Feature: AVX2
+func (x Uint16x16) AddPairsGrouped(y Uint16x16) Uint16x16
+
+// AddPairsGrouped horizontally adds adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
// Asm: VPHADDD, CPU Feature: AVX2
-func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8
+func (x Uint32x8) AddPairsGrouped(y Uint32x8) Uint32x8
/* AddPairsSaturated */
// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
// Asm: VPHADDSW, CPU Feature: AVX
func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8
-// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
+/* AddPairsSaturatedGrouped */
+
+// AddPairsSaturatedGrouped horizontally adds adjacent pairs of elements with saturation.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...].
//
// Asm: VPHADDSW, CPU Feature: AVX2
-func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16
+func (x Int16x16) AddPairsSaturatedGrouped(y Int16x16) Int16x16
/* AddSaturated */
@@ -1275,7 +1286,9 @@ func (x Uint64x8) Compress(mask Mask64x8) Uint64x8
/* ConcatPermute */
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1283,7 +1296,9 @@ func (x Uint64x8) Compress(mask Mask64x8) Uint64x8
func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1291,7 +1306,9 @@ func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16
func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1299,7 +1316,9 @@ func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16
func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1307,7 +1326,9 @@ func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32
func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1315,7 +1336,9 @@ func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32
func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1323,7 +1346,9 @@ func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64
func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1331,7 +1356,9 @@ func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64
func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1339,7 +1366,9 @@ func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8
func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1347,7 +1376,9 @@ func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8
func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1355,7 +1386,9 @@ func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16
func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1363,7 +1396,9 @@ func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16
func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1371,7 +1406,9 @@ func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32
func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1379,7 +1416,9 @@ func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32
func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1387,7 +1426,9 @@ func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4
func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1395,7 +1436,9 @@ func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4
func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1403,7 +1446,9 @@ func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4
func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1411,7 +1456,9 @@ func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8
func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1419,7 +1466,9 @@ func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8
func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1427,7 +1476,9 @@ func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8
func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1435,7 +1486,9 @@ func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16
func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1443,7 +1496,9 @@ func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16
func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1451,7 +1506,9 @@ func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16
func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1459,7 +1516,9 @@ func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2
func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1467,7 +1526,9 @@ func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2
func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1475,7 +1536,9 @@ func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2
func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1483,7 +1546,9 @@ func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4
func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1491,7 +1556,9 @@ func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4
func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1499,7 +1566,9 @@ func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4
func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1507,7 +1576,9 @@ func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8
func (x Int64x8) ConcatPermute(y Int64x8, indices Uint64x8) Int64x8
// ConcatPermute performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
+// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+//
// where xy is the concatenation of x (lower half) and y (upper half).
// Only the needed bits to represent xy's index are used in indices' elements.
//
@@ -1516,33 +1587,33 @@ func (x Uint64x8) ConcatPermute(y Uint64x8, indices Uint64x8) Uint64x8
/* ConcatShiftBytesRight */
-// ConcatShiftBytesRight concatenates x and y and shift it right by constant bytes.
+// ConcatShiftBytesRight concatenates x and y and shift it right by shift bytes.
// The result vector will be the lower half of the concatenated vector.
//
-// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPALIGNR, CPU Feature: AVX
-func (x Uint8x16) ConcatShiftBytesRight(constant uint8, y Uint8x16) Uint8x16
+func (x Uint8x16) ConcatShiftBytesRight(shift uint8, y Uint8x16) Uint8x16
/* ConcatShiftBytesRightGrouped */
-// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
+// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by shift bytes.
// The result vector will be the lower half of the concatenated vector.
// This operation is performed grouped by each 16 byte.
//
-// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPALIGNR, CPU Feature: AVX2
-func (x Uint8x32) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x32) Uint8x32
+func (x Uint8x32) ConcatShiftBytesRightGrouped(shift uint8, y Uint8x32) Uint8x32
-// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes.
+// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by shift bytes.
// The result vector will be the lower half of the concatenated vector.
// This operation is performed grouped by each 16 byte.
//
-// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPALIGNR, CPU Feature: AVX512
-func (x Uint8x64) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x64) Uint8x64
+func (x Uint8x64) ConcatShiftBytesRightGrouped(shift uint8, y Uint8x64) Uint8x64
/* ConvertToFloat32 */
@@ -1872,38 +1943,38 @@ func (x Float64x8) ConvertToUint64() Uint64x8
/* CopySign */
-// CopySign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// CopySign returns the product of x with -1, 0, or 1,
+// whichever constant is nearest to the value of y.
//
// Asm: VPSIGNB, CPU Feature: AVX
func (x Int8x16) CopySign(y Int8x16) Int8x16
-// CopySign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// CopySign returns the product of x with -1, 0, or 1,
+// whichever constant is nearest to the value of y.
//
// Asm: VPSIGNB, CPU Feature: AVX2
func (x Int8x32) CopySign(y Int8x32) Int8x32
-// CopySign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// CopySign returns the product of x with -1, 0, or 1,
+// whichever constant is nearest to the value of y.
//
// Asm: VPSIGNW, CPU Feature: AVX
func (x Int16x8) CopySign(y Int16x8) Int16x8
-// CopySign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// CopySign returns the product of x with -1, 0, or 1,
+// whichever constant is nearest to the value of y.
//
// Asm: VPSIGNW, CPU Feature: AVX2
func (x Int16x16) CopySign(y Int16x16) Int16x16
-// CopySign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// CopySign returns the product of x with -1, 0, or 1,
+// whichever constant is nearest to the value of y.
//
// Asm: VPSIGND, CPU Feature: AVX
func (x Int32x4) CopySign(y Int32x4) Int32x4
-// CopySign returns the product of the first operand with -1, 0, or 1,
-// whichever constant is nearest to the value of the second operand.
+// CopySign returns the product of x with -1, 0, or 1,
+// whichever constant is nearest to the value of y.
//
// Asm: VPSIGND, CPU Feature: AVX2
func (x Int32x8) CopySign(y Int32x8) Int32x8
@@ -1980,194 +2051,154 @@ func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16
// Asm: VPMADDUBSW, CPU Feature: AVX512
func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32
-/* DotProductQuadruple */
-
-// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
-// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x16) DotProductQuadruple(y Uint8x16) Int32x4
-
-// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
-// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x32) DotProductQuadruple(y Uint8x32) Int32x8
-
-// DotProductQuadruple performs dot products on groups of 4 elements of x and y.
-// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) DotProductQuadruple(y Uint8x64) Int32x16
-
-/* DotProductQuadrupleSaturated */
-
-// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
-// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x16) DotProductQuadrupleSaturated(y Uint8x16) Int32x4
-
-// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
-// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x32) DotProductQuadrupleSaturated(y Uint8x32) Int32x8
-
-// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y.
-// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction.
-//
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16
-
/* Equal */
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func (x Int8x16) Equal(y Int8x16) Mask8x16
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func (x Int8x32) Equal(y Int8x32) Mask8x32
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQB, CPU Feature: AVX512
func (x Int8x64) Equal(y Int8x64) Mask8x64
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func (x Int16x8) Equal(y Int16x8) Mask16x8
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Int16x16) Equal(y Int16x16) Mask16x16
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQW, CPU Feature: AVX512
func (x Int16x32) Equal(y Int16x32) Mask16x32
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func (x Int32x4) Equal(y Int32x4) Mask32x4
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func (x Int32x8) Equal(y Int32x8) Mask32x8
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQD, CPU Feature: AVX512
func (x Int32x16) Equal(y Int32x16) Mask32x16
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func (x Int64x2) Equal(y Int64x2) Mask64x2
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Int64x4) Equal(y Int64x4) Mask64x4
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512
func (x Int64x8) Equal(y Int64x8) Mask64x8
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func (x Uint8x16) Equal(y Uint8x16) Mask8x16
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func (x Uint8x32) Equal(y Uint8x32) Mask8x32
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQB, CPU Feature: AVX512
func (x Uint8x64) Equal(y Uint8x64) Mask8x64
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func (x Uint16x8) Equal(y Uint16x8) Mask16x8
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Uint16x16) Equal(y Uint16x16) Mask16x16
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQW, CPU Feature: AVX512
func (x Uint16x32) Equal(y Uint16x32) Mask16x32
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func (x Uint32x4) Equal(y Uint32x4) Mask32x4
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func (x Uint32x8) Equal(y Uint32x8) Mask32x8
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQD, CPU Feature: AVX512
func (x Uint32x16) Equal(y Uint32x16) Mask32x16
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func (x Uint64x2) Equal(y Uint64x2) Mask64x2
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Uint64x4) Equal(y Uint64x4) Mask64x4
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512
func (x Uint64x8) Equal(y Uint64x8) Mask64x8
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Equal(y Float32x4) Mask32x4
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Equal(y Float32x8) Mask32x8
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) Equal(y Float32x16) Mask32x16
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Equal(y Float64x2) Mask64x2
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Equal(y Float64x4) Mask64x4
-// Equal returns x equals y, elementwise.
+// Equal returns a mask whose elements indicate whether x == y.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Equal(y Float64x8) Mask64x8
@@ -2354,254 +2385,218 @@ func (x Uint64x4) Expand(mask Mask64x4) Uint64x4
// Asm: VPEXPANDQ, CPU Feature: AVX512
func (x Uint64x8) Expand(mask Mask64x8) Uint64x8
-/* ExtendLo2ToInt64x2 */
+/* ExtendLo2ToInt64 */
-// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
-// The result vector's elements are sign-extended.
+// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX
-func (x Int8x16) ExtendLo2ToInt64x2() Int64x2
+func (x Int8x16) ExtendLo2ToInt64() Int64x2
-// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
-// The result vector's elements are sign-extended.
+// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX
-func (x Int16x8) ExtendLo2ToInt64x2() Int64x2
+func (x Int16x8) ExtendLo2ToInt64() Int64x2
-// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64.
-// The result vector's elements are sign-extended.
+// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX
-func (x Int32x4) ExtendLo2ToInt64x2() Int64x2
+func (x Int32x4) ExtendLo2ToInt64() Int64x2
-/* ExtendLo2ToUint64x2 */
+/* ExtendLo2ToUint64 */
-// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
-// The result vector's elements are zero-extended.
+// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX
-func (x Uint8x16) ExtendLo2ToUint64x2() Uint64x2
+func (x Uint8x16) ExtendLo2ToUint64() Uint64x2
-// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
-// The result vector's elements are zero-extended.
+// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX
-func (x Uint16x8) ExtendLo2ToUint64x2() Uint64x2
+func (x Uint16x8) ExtendLo2ToUint64() Uint64x2
-// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64.
-// The result vector's elements are zero-extended.
+// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX
-func (x Uint32x4) ExtendLo2ToUint64x2() Uint64x2
+func (x Uint32x4) ExtendLo2ToUint64() Uint64x2
-/* ExtendLo4ToInt32x4 */
+/* ExtendLo4ToInt32 */
-// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
-// The result vector's elements are sign-extended.
+// ExtendLo4ToInt32 sign-extends 4 lowest vector element values to int32.
//
// Asm: VPMOVSXBD, CPU Feature: AVX
-func (x Int8x16) ExtendLo4ToInt32x4() Int32x4
+func (x Int8x16) ExtendLo4ToInt32() Int32x4
-// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32.
-// The result vector's elements are sign-extended.
+// ExtendLo4ToInt32 sign-extends 4 lowest vector element values to int32.
//
// Asm: VPMOVSXWD, CPU Feature: AVX
-func (x Int16x8) ExtendLo4ToInt32x4() Int32x4
+func (x Int16x8) ExtendLo4ToInt32() Int32x4
-/* ExtendLo4ToInt64x4 */
+/* ExtendLo4ToInt64 */
-// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
-// The result vector's elements are sign-extended.
+// ExtendLo4ToInt64 sign-extends 4 lowest vector element values to int64.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX2
-func (x Int8x16) ExtendLo4ToInt64x4() Int64x4
+func (x Int8x16) ExtendLo4ToInt64() Int64x4
-// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64.
-// The result vector's elements are sign-extended.
+// ExtendLo4ToInt64 sign-extends 4 lowest vector element values to int64.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX2
-func (x Int16x8) ExtendLo4ToInt64x4() Int64x4
+func (x Int16x8) ExtendLo4ToInt64() Int64x4
-/* ExtendLo4ToUint32x4 */
+/* ExtendLo4ToUint32 */
-// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
-// The result vector's elements are zero-extended.
+// ExtendLo4ToUint32 zero-extends 4 lowest vector element values to uint32.
//
// Asm: VPMOVZXBD, CPU Feature: AVX
-func (x Uint8x16) ExtendLo4ToUint32x4() Uint32x4
+func (x Uint8x16) ExtendLo4ToUint32() Uint32x4
-// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32.
-// The result vector's elements are zero-extended.
+// ExtendLo4ToUint32 zero-extends 4 lowest vector element values to uint32.
//
// Asm: VPMOVZXWD, CPU Feature: AVX
-func (x Uint16x8) ExtendLo4ToUint32x4() Uint32x4
+func (x Uint16x8) ExtendLo4ToUint32() Uint32x4
-/* ExtendLo4ToUint64x4 */
+/* ExtendLo4ToUint64 */
-// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
-// The result vector's elements are zero-extended.
+// ExtendLo4ToUint64 zero-extends 4 lowest vector element values to uint64.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX2
-func (x Uint8x16) ExtendLo4ToUint64x4() Uint64x4
+func (x Uint8x16) ExtendLo4ToUint64() Uint64x4
-// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64.
-// The result vector's elements are zero-extended.
+// ExtendLo4ToUint64 zero-extends 4 lowest vector element values to uint64.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX2
-func (x Uint16x8) ExtendLo4ToUint64x4() Uint64x4
+func (x Uint16x8) ExtendLo4ToUint64() Uint64x4
-/* ExtendLo8ToInt16x8 */
+/* ExtendLo8ToInt16 */
-// ExtendLo8ToInt16x8 converts 8 lowest vector element values to int16.
-// The result vector's elements are sign-extended.
+// ExtendLo8ToInt16 sign-extends 8 lowest vector element values to int16.
//
// Asm: VPMOVSXBW, CPU Feature: AVX
-func (x Int8x16) ExtendLo8ToInt16x8() Int16x8
+func (x Int8x16) ExtendLo8ToInt16() Int16x8
-/* ExtendLo8ToInt32x8 */
+/* ExtendLo8ToInt32 */
-// ExtendLo8ToInt32x8 converts 8 lowest vector element values to int32.
-// The result vector's elements are sign-extended.
+// ExtendLo8ToInt32 sign-extends 8 lowest vector element values to int32.
//
// Asm: VPMOVSXBD, CPU Feature: AVX2
-func (x Int8x16) ExtendLo8ToInt32x8() Int32x8
+func (x Int8x16) ExtendLo8ToInt32() Int32x8
-/* ExtendLo8ToInt64x8 */
+/* ExtendLo8ToInt64 */
-// ExtendLo8ToInt64x8 converts 8 lowest vector element values to int64.
-// The result vector's elements are sign-extended.
+// ExtendLo8ToInt64 sign-extends 8 lowest vector element values to int64.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX512
-func (x Int8x16) ExtendLo8ToInt64x8() Int64x8
+func (x Int8x16) ExtendLo8ToInt64() Int64x8
-/* ExtendLo8ToUint16x8 */
+/* ExtendLo8ToUint16 */
-// ExtendLo8ToUint16x8 converts 8 lowest vector element values to uint16.
-// The result vector's elements are zero-extended.
+// ExtendLo8ToUint16 zero-extends 8 lowest vector element values to uint16.
//
// Asm: VPMOVZXBW, CPU Feature: AVX
-func (x Uint8x16) ExtendLo8ToUint16x8() Uint16x8
+func (x Uint8x16) ExtendLo8ToUint16() Uint16x8
-/* ExtendLo8ToUint32x8 */
+/* ExtendLo8ToUint32 */
-// ExtendLo8ToUint32x8 converts 8 lowest vector element values to uint32.
-// The result vector's elements are zero-extended.
+// ExtendLo8ToUint32 zero-extends 8 lowest vector element values to uint32.
//
// Asm: VPMOVZXBD, CPU Feature: AVX2
-func (x Uint8x16) ExtendLo8ToUint32x8() Uint32x8
+func (x Uint8x16) ExtendLo8ToUint32() Uint32x8
-/* ExtendLo8ToUint64x8 */
+/* ExtendLo8ToUint64 */
-// ExtendLo8ToUint64x8 converts 8 lowest vector element values to uint64.
-// The result vector's elements are zero-extended.
+// ExtendLo8ToUint64 zero-extends 8 lowest vector element values to uint64.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX512
-func (x Uint8x16) ExtendLo8ToUint64x8() Uint64x8
+func (x Uint8x16) ExtendLo8ToUint64() Uint64x8
/* ExtendToInt16 */
-// ExtendToInt16 converts element values to int16.
-// The result vector's elements are sign-extended.
+// ExtendToInt16 sign-extends element values to int16.
//
// Asm: VPMOVSXBW, CPU Feature: AVX2
func (x Int8x16) ExtendToInt16() Int16x16
-// ExtendToInt16 converts element values to int16.
-// The result vector's elements are sign-extended.
+// ExtendToInt16 sign-extends element values to int16.
//
// Asm: VPMOVSXBW, CPU Feature: AVX512
func (x Int8x32) ExtendToInt16() Int16x32
/* ExtendToInt32 */
-// ExtendToInt32 converts element values to int32.
-// The result vector's elements are sign-extended.
+// ExtendToInt32 sign-extends element values to int32.
//
// Asm: VPMOVSXBD, CPU Feature: AVX512
func (x Int8x16) ExtendToInt32() Int32x16
-// ExtendToInt32 converts element values to int32.
-// The result vector's elements are sign-extended.
+// ExtendToInt32 sign-extends element values to int32.
//
// Asm: VPMOVSXWD, CPU Feature: AVX2
func (x Int16x8) ExtendToInt32() Int32x8
-// ExtendToInt32 converts element values to int32.
-// The result vector's elements are sign-extended.
+// ExtendToInt32 sign-extends element values to int32.
//
// Asm: VPMOVSXWD, CPU Feature: AVX512
func (x Int16x16) ExtendToInt32() Int32x16
/* ExtendToInt64 */
-// ExtendToInt64 converts element values to int64.
-// The result vector's elements are sign-extended.
+// ExtendToInt64 sign-extends element values to int64.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX512
func (x Int16x8) ExtendToInt64() Int64x8
-// ExtendToInt64 converts element values to int64.
-// The result vector's elements are sign-extended.
+// ExtendToInt64 sign-extends element values to int64.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX2
func (x Int32x4) ExtendToInt64() Int64x4
-// ExtendToInt64 converts element values to int64.
-// The result vector's elements are sign-extended.
+// ExtendToInt64 sign-extends element values to int64.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX512
func (x Int32x8) ExtendToInt64() Int64x8
/* ExtendToUint16 */
-// ExtendToUint16 converts element values to uint16.
-// The result vector's elements are zero-extended.
+// ExtendToUint16 zero-extends element values to uint16.
//
// Asm: VPMOVZXBW, CPU Feature: AVX2
func (x Uint8x16) ExtendToUint16() Uint16x16
-// ExtendToUint16 converts element values to uint16.
-// The result vector's elements are zero-extended.
+// ExtendToUint16 zero-extends element values to uint16.
//
// Asm: VPMOVZXBW, CPU Feature: AVX512
func (x Uint8x32) ExtendToUint16() Uint16x32
/* ExtendToUint32 */
-// ExtendToUint32 converts element values to uint32.
-// The result vector's elements are zero-extended.
+// ExtendToUint32 zero-extends element values to uint32.
//
// Asm: VPMOVZXBD, CPU Feature: AVX512
func (x Uint8x16) ExtendToUint32() Uint32x16
-// ExtendToUint32 converts element values to uint32.
-// The result vector's elements are zero-extended.
+// ExtendToUint32 zero-extends element values to uint32.
//
// Asm: VPMOVZXWD, CPU Feature: AVX2
func (x Uint16x8) ExtendToUint32() Uint32x8
-// ExtendToUint32 converts element values to uint32.
-// The result vector's elements are zero-extended.
+// ExtendToUint32 zero-extends element values to uint32.
//
// Asm: VPMOVZXWD, CPU Feature: AVX512
func (x Uint16x16) ExtendToUint32() Uint32x16
/* ExtendToUint64 */
-// ExtendToUint64 converts element values to uint64.
-// The result vector's elements are zero-extended.
+// ExtendToUint64 zero-extends element values to uint64.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX512
func (x Uint16x8) ExtendToUint64() Uint64x8
-// ExtendToUint64 converts element values to uint64.
-// The result vector's elements are zero-extended.
+// ExtendToUint64 zero-extends element values to uint64.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX2
func (x Uint32x4) ExtendToUint64() Uint64x4
-// ExtendToUint64 converts element values to uint64.
-// The result vector's elements are zero-extended.
+// ExtendToUint64 zero-extends element values to uint64.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX512
func (x Uint32x8) ExtendToUint64() Uint64x8
@@ -3081,184 +3076,184 @@ func (x Uint64x8) GetLo() Uint64x4
/* Greater */
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTB, CPU Feature: AVX
func (x Int8x16) Greater(y Int8x16) Mask8x16
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTB, CPU Feature: AVX2
func (x Int8x32) Greater(y Int8x32) Mask8x32
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTB, CPU Feature: AVX512
func (x Int8x64) Greater(y Int8x64) Mask8x64
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTW, CPU Feature: AVX
func (x Int16x8) Greater(y Int16x8) Mask16x8
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTW, CPU Feature: AVX2
func (x Int16x16) Greater(y Int16x16) Mask16x16
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTW, CPU Feature: AVX512
func (x Int16x32) Greater(y Int16x32) Mask16x32
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTD, CPU Feature: AVX
func (x Int32x4) Greater(y Int32x4) Mask32x4
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTD, CPU Feature: AVX2
func (x Int32x8) Greater(y Int32x8) Mask32x8
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTD, CPU Feature: AVX512
func (x Int32x16) Greater(y Int32x16) Mask32x16
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTQ, CPU Feature: AVX
func (x Int64x2) Greater(y Int64x2) Mask64x2
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTQ, CPU Feature: AVX2
func (x Int64x4) Greater(y Int64x4) Mask64x4
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPGTQ, CPU Feature: AVX512
func (x Int64x8) Greater(y Int64x8) Mask64x8
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Greater(y Float32x4) Mask32x4
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Greater(y Float32x8) Mask32x8
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) Greater(y Float32x16) Mask32x16
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Greater(y Float64x2) Mask64x2
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Greater(y Float64x4) Mask64x4
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Greater(y Float64x8) Mask64x8
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) Greater(y Uint8x64) Mask8x64
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) Greater(y Uint16x32) Mask16x32
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) Greater(y Uint32x16) Mask32x16
-// Greater returns x greater-than y, elementwise.
+// Greater returns a mask whose elements indicate whether x > y.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) Greater(y Uint64x8) Mask64x8
/* GreaterEqual */
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
-// GreaterEqual returns x greater-than-or-equals y, elementwise.
+// GreaterEqual returns a mask whose elements indicate whether x >= y.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
@@ -3451,38 +3446,6 @@ func (x Uint64x4) InterleaveLoGrouped(y Uint64x4) Uint64x4
// Asm: VPUNPCKLQDQ, CPU Feature: AVX512
func (x Uint64x8) InterleaveLoGrouped(y Uint64x8) Uint64x8
-/* IsNan */
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) IsNan(y Float32x4) Mask32x4
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) IsNan(y Float32x8) Mask32x8
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) IsNan(y Float32x16) Mask32x16
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) IsNan(y Float64x2) Mask64x2
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) IsNan(y Float64x4) Mask64x4
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) IsNan(y Float64x8) Mask64x8
-
/* LeadingZeros */
// LeadingZeros counts the leading zeros of each element in x.
@@ -3547,448 +3510,448 @@ func (x Uint64x8) LeadingZeros() Uint64x8
/* Less */
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Less(y Float32x4) Mask32x4
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Less(y Float32x8) Mask32x8
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) Less(y Float32x16) Mask32x16
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Less(y Float64x2) Mask64x2
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Less(y Float64x4) Mask64x4
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Less(y Float64x8) Mask64x8
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) Less(y Int8x64) Mask8x64
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) Less(y Int16x32) Mask16x32
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) Less(y Int32x16) Mask32x16
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) Less(y Int64x8) Mask64x8
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) Less(y Uint8x64) Mask8x64
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) Less(y Uint16x32) Mask16x32
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) Less(y Uint32x16) Mask32x16
-// Less returns x less-than y, elementwise.
+// Less returns a mask whose elements indicate whether x < y.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) Less(y Uint64x8) Mask64x8
/* LessEqual */
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) LessEqual(y Float32x4) Mask32x4
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) LessEqual(y Float32x8) Mask32x8
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) LessEqual(y Float32x16) Mask32x16
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) LessEqual(y Float64x2) Mask64x2
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) LessEqual(y Float64x4) Mask64x4
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) LessEqual(y Float64x8) Mask64x8
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) LessEqual(y Int8x64) Mask8x64
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) LessEqual(y Int16x32) Mask16x32
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) LessEqual(y Int32x16) Mask32x16
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) LessEqual(y Int64x8) Mask64x8
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
-// LessEqual returns x less-than-or-equals y, elementwise.
+// LessEqual returns a mask whose elements indicate whether x <= y.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
/* Max */
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VMAXPS, CPU Feature: AVX
func (x Float32x4) Max(y Float32x4) Float32x4
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VMAXPS, CPU Feature: AVX
func (x Float32x8) Max(y Float32x8) Float32x8
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VMAXPS, CPU Feature: AVX512
func (x Float32x16) Max(y Float32x16) Float32x16
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VMAXPD, CPU Feature: AVX
func (x Float64x2) Max(y Float64x2) Float64x2
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VMAXPD, CPU Feature: AVX
func (x Float64x4) Max(y Float64x4) Float64x4
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VMAXPD, CPU Feature: AVX512
func (x Float64x8) Max(y Float64x8) Float64x8
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSB, CPU Feature: AVX
func (x Int8x16) Max(y Int8x16) Int8x16
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSB, CPU Feature: AVX2
func (x Int8x32) Max(y Int8x32) Int8x32
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSB, CPU Feature: AVX512
func (x Int8x64) Max(y Int8x64) Int8x64
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSW, CPU Feature: AVX
func (x Int16x8) Max(y Int16x8) Int16x8
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSW, CPU Feature: AVX2
func (x Int16x16) Max(y Int16x16) Int16x16
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSW, CPU Feature: AVX512
func (x Int16x32) Max(y Int16x32) Int16x32
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSD, CPU Feature: AVX
func (x Int32x4) Max(y Int32x4) Int32x4
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSD, CPU Feature: AVX2
func (x Int32x8) Max(y Int32x8) Int32x8
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSD, CPU Feature: AVX512
func (x Int32x16) Max(y Int32x16) Int32x16
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSQ, CPU Feature: AVX512
func (x Int64x2) Max(y Int64x2) Int64x2
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSQ, CPU Feature: AVX512
func (x Int64x4) Max(y Int64x4) Int64x4
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXSQ, CPU Feature: AVX512
func (x Int64x8) Max(y Int64x8) Int64x8
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUB, CPU Feature: AVX
func (x Uint8x16) Max(y Uint8x16) Uint8x16
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUB, CPU Feature: AVX2
func (x Uint8x32) Max(y Uint8x32) Uint8x32
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUB, CPU Feature: AVX512
func (x Uint8x64) Max(y Uint8x64) Uint8x64
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUW, CPU Feature: AVX
func (x Uint16x8) Max(y Uint16x8) Uint16x8
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUW, CPU Feature: AVX2
func (x Uint16x16) Max(y Uint16x16) Uint16x16
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUW, CPU Feature: AVX512
func (x Uint16x32) Max(y Uint16x32) Uint16x32
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUD, CPU Feature: AVX
func (x Uint32x4) Max(y Uint32x4) Uint32x4
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUD, CPU Feature: AVX2
func (x Uint32x8) Max(y Uint32x8) Uint32x8
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUD, CPU Feature: AVX512
func (x Uint32x16) Max(y Uint32x16) Uint32x16
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUQ, CPU Feature: AVX512
func (x Uint64x2) Max(y Uint64x2) Uint64x2
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUQ, CPU Feature: AVX512
func (x Uint64x4) Max(y Uint64x4) Uint64x4
-// Max computes the maximum of corresponding elements.
+// Max computes the maximum of each pair of corresponding elements in x and y.
//
// Asm: VPMAXUQ, CPU Feature: AVX512
func (x Uint64x8) Max(y Uint64x8) Uint64x8
/* Min */
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VMINPS, CPU Feature: AVX
func (x Float32x4) Min(y Float32x4) Float32x4
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VMINPS, CPU Feature: AVX
func (x Float32x8) Min(y Float32x8) Float32x8
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VMINPS, CPU Feature: AVX512
func (x Float32x16) Min(y Float32x16) Float32x16
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VMINPD, CPU Feature: AVX
func (x Float64x2) Min(y Float64x2) Float64x2
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VMINPD, CPU Feature: AVX
func (x Float64x4) Min(y Float64x4) Float64x4
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VMINPD, CPU Feature: AVX512
func (x Float64x8) Min(y Float64x8) Float64x8
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSB, CPU Feature: AVX
func (x Int8x16) Min(y Int8x16) Int8x16
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSB, CPU Feature: AVX2
func (x Int8x32) Min(y Int8x32) Int8x32
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSB, CPU Feature: AVX512
func (x Int8x64) Min(y Int8x64) Int8x64
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSW, CPU Feature: AVX
func (x Int16x8) Min(y Int16x8) Int16x8
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSW, CPU Feature: AVX2
func (x Int16x16) Min(y Int16x16) Int16x16
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSW, CPU Feature: AVX512
func (x Int16x32) Min(y Int16x32) Int16x32
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSD, CPU Feature: AVX
func (x Int32x4) Min(y Int32x4) Int32x4
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSD, CPU Feature: AVX2
func (x Int32x8) Min(y Int32x8) Int32x8
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSD, CPU Feature: AVX512
func (x Int32x16) Min(y Int32x16) Int32x16
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSQ, CPU Feature: AVX512
func (x Int64x2) Min(y Int64x2) Int64x2
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSQ, CPU Feature: AVX512
func (x Int64x4) Min(y Int64x4) Int64x4
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINSQ, CPU Feature: AVX512
func (x Int64x8) Min(y Int64x8) Int64x8
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUB, CPU Feature: AVX
func (x Uint8x16) Min(y Uint8x16) Uint8x16
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUB, CPU Feature: AVX2
func (x Uint8x32) Min(y Uint8x32) Uint8x32
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUB, CPU Feature: AVX512
func (x Uint8x64) Min(y Uint8x64) Uint8x64
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUW, CPU Feature: AVX
func (x Uint16x8) Min(y Uint16x8) Uint16x8
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUW, CPU Feature: AVX2
func (x Uint16x16) Min(y Uint16x16) Uint16x16
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUW, CPU Feature: AVX512
func (x Uint16x32) Min(y Uint16x32) Uint16x32
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUD, CPU Feature: AVX
func (x Uint32x4) Min(y Uint32x4) Uint32x4
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUD, CPU Feature: AVX2
func (x Uint32x8) Min(y Uint32x8) Uint32x8
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUD, CPU Feature: AVX512
func (x Uint32x16) Min(y Uint32x16) Uint32x16
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUQ, CPU Feature: AVX512
func (x Uint64x2) Min(y Uint64x2) Uint64x2
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUQ, CPU Feature: AVX512
func (x Uint64x4) Min(y Uint64x4) Uint64x4
-// Min computes the minimum of corresponding elements.
+// Min computes the minimum of each pair of corresponding elements in x and y.
//
// Asm: VPMINUQ, CPU Feature: AVX512
func (x Uint64x8) Min(y Uint64x8) Uint64x8
@@ -4182,25 +4145,25 @@ func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
/* MulEvenWiden */
// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Result[i] = v1[2*i] * v2[2*i].
//
// Asm: VPMULDQ, CPU Feature: AVX
func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Result[i] = v1[2*i] * v2[2*i].
//
// Asm: VPMULDQ, CPU Feature: AVX2
func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Result[i] = v1[2*i] * v2[2*i].
//
// Asm: VPMULUDQ, CPU Feature: AVX
func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
+// Result[i] = v1[2*i] * v2[2*i].
//
// Asm: VPMULUDQ, CPU Feature: AVX2
func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
@@ -4271,72 +4234,72 @@ func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
/* NotEqual */
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) NotEqual(y Float32x4) Mask32x4
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) NotEqual(y Float32x8) Mask32x8
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) NotEqual(y Float32x16) Mask32x16
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) NotEqual(y Float64x2) Mask64x2
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) NotEqual(y Float64x4) Mask64x4
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) NotEqual(y Float64x8) Mask64x8
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) NotEqual(y Int8x64) Mask8x64
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) NotEqual(y Int16x32) Mask16x32
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) NotEqual(y Int32x16) Mask32x16
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) NotEqual(y Int64x8) Mask64x8
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
-// NotEqual returns x not-equals y, elementwise.
+// NotEqual returns a mask whose elements indicate whether x != y.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
@@ -4588,169 +4551,217 @@ func (x Uint64x8) Or(y Uint64x8) Uint64x8
/* Permute */
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 4 bits (values 0-15) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 4 bits (values 0-15) of each element of indices is used.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x16) Permute(indices Uint8x16) Int8x16
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 4 bits (values 0-15) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 4 bits (values 0-15) of each element of indices is used.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 5 bits (values 0-31) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 5 bits (values 0-31) of each element of indices is used.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x32) Permute(indices Uint8x32) Int8x32
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 5 bits (values 0-31) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 5 bits (values 0-31) of each element of indices is used.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 6 bits (values 0-63) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 6 bits (values 0-63) of each element of indices is used.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Int8x64) Permute(indices Uint8x64) Int8x64
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 6 bits (values 0-63) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 6 bits (values 0-63) of each element of indices is used.
//
// Asm: VPERMB, CPU Feature: AVX512VBMI
func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 3 bits (values 0-7) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 3 bits (values 0-7) of each element of indices is used.
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Int16x8) Permute(indices Uint16x8) Int16x8
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 3 bits (values 0-7) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 3 bits (values 0-7) of each element of indices is used.
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 4 bits (values 0-15) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 4 bits (values 0-15) of each element of indices is used.
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Int16x16) Permute(indices Uint16x16) Int16x16
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 4 bits (values 0-15) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 4 bits (values 0-15) of each element of indices is used.
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 5 bits (values 0-31) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 5 bits (values 0-31) of each element of indices is used.
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Int16x32) Permute(indices Uint16x32) Int16x32
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 5 bits (values 0-31) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 5 bits (values 0-31) of each element of indices is used.
//
// Asm: VPERMW, CPU Feature: AVX512
func (x Uint16x32) Permute(indices Uint16x32) Uint16x32
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 3 bits (values 0-7) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 3 bits (values 0-7) of each element of indices is used.
//
// Asm: VPERMPS, CPU Feature: AVX2
func (x Float32x8) Permute(indices Uint32x8) Float32x8
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 3 bits (values 0-7) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 3 bits (values 0-7) of each element of indices is used.
//
// Asm: VPERMD, CPU Feature: AVX2
func (x Int32x8) Permute(indices Uint32x8) Int32x8
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 3 bits (values 0-7) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 3 bits (values 0-7) of each element of indices is used.
//
// Asm: VPERMD, CPU Feature: AVX2
func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 4 bits (values 0-15) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 4 bits (values 0-15) of each element of indices is used.
//
// Asm: VPERMPS, CPU Feature: AVX512
func (x Float32x16) Permute(indices Uint32x16) Float32x16
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 4 bits (values 0-15) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 4 bits (values 0-15) of each element of indices is used.
//
// Asm: VPERMD, CPU Feature: AVX512
func (x Int32x16) Permute(indices Uint32x16) Int32x16
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 4 bits (values 0-15) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 4 bits (values 0-15) of each element of indices is used.
//
// Asm: VPERMD, CPU Feature: AVX512
func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 2 bits (values 0-3) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 2 bits (values 0-3) of each element of indices is used.
//
// Asm: VPERMPD, CPU Feature: AVX512
func (x Float64x4) Permute(indices Uint64x4) Float64x4
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 2 bits (values 0-3) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 2 bits (values 0-3) of each element of indices is used.
//
// Asm: VPERMQ, CPU Feature: AVX512
func (x Int64x4) Permute(indices Uint64x4) Int64x4
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 2 bits (values 0-3) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 2 bits (values 0-3) of each element of indices is used.
//
// Asm: VPERMQ, CPU Feature: AVX512
func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 3 bits (values 0-7) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 3 bits (values 0-7) of each element of indices is used.
//
// Asm: VPERMPD, CPU Feature: AVX512
func (x Float64x8) Permute(indices Uint64x8) Float64x8
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 3 bits (values 0-7) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 3 bits (values 0-7) of each element of indices is used.
//
// Asm: VPERMQ, CPU Feature: AVX512
func (x Int64x8) Permute(indices Uint64x8) Int64x8
// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// The low 3 bits (values 0-7) of each element of indices is used
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// The low 3 bits (values 0-7) of each element of indices is used.
//
// Asm: VPERMQ, CPU Feature: AVX512
func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
@@ -4758,7 +4769,9 @@ func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
/* PermuteOrZero */
// PermuteOrZero performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
// The lower four bits of each byte-sized index in indices select an element from x,
// unless the index's sign bit is set in which case zero is used instead.
//
@@ -4766,7 +4779,9 @@ func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
func (x Int8x16) PermuteOrZero(indices Int8x16) Int8x16
// PermuteOrZero performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
+// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+//
// The lower four bits of each byte-sized index in indices select an element from x,
// unless the index's sign bit is set in which case zero is used instead.
//
@@ -4776,7 +4791,9 @@ func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16
/* PermuteOrZeroGrouped */
// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
-// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+//
+// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+//
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
@@ -4785,7 +4802,9 @@ func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16
func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32
// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
-// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+//
+// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+//
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
@@ -4794,7 +4813,9 @@ func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32
func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64
// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
-// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+//
+// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+//
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
@@ -4803,7 +4824,9 @@ func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64
func (x Uint8x32) PermuteOrZeroGrouped(indices Int8x32) Uint8x32
// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices:
-// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+//
+// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...}
+//
// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x,
// unless the index's sign bit is set in which case zero is used instead.
// Each group is of size 128-bit.
@@ -4877,84 +4900,84 @@ func (x Float64x8) ReciprocalSqrt() Float64x8
/* RotateAllLeft */
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Int32x4) RotateAllLeft(shift uint8) Int32x4
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Int32x8) RotateAllLeft(shift uint8) Int32x8
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Int32x16) RotateAllLeft(shift uint8) Int32x16
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Int64x2) RotateAllLeft(shift uint8) Int64x2
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Int64x4) RotateAllLeft(shift uint8) Int64x4
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Int64x8) RotateAllLeft(shift uint8) Int64x8
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Uint32x4) RotateAllLeft(shift uint8) Uint32x4
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Uint32x8) RotateAllLeft(shift uint8) Uint32x8
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLD, CPU Feature: AVX512
func (x Uint32x16) RotateAllLeft(shift uint8) Uint32x16
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Uint64x2) RotateAllLeft(shift uint8) Uint64x2
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPROLQ, CPU Feature: AVX512
func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4
-// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
+// RotateAllLeft rotates each element to the left by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
@@ -4963,84 +4986,84 @@ func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
/* RotateAllRight */
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Int32x4) RotateAllRight(shift uint8) Int32x4
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Int32x8) RotateAllRight(shift uint8) Int32x8
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Int32x16) RotateAllRight(shift uint8) Int32x16
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Int64x2) RotateAllRight(shift uint8) Int64x2
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Int64x4) RotateAllRight(shift uint8) Int64x4
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Int64x8) RotateAllRight(shift uint8) Int64x8
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORD, CPU Feature: AVX512
func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPRORQ, CPU Feature: AVX512
func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
+// RotateAllRight rotates each element to the right by the number of bits specified by shift.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
@@ -5173,22 +5196,22 @@ func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
/* RoundToEven */
-// RoundToEven rounds elements to the nearest integer.
+// RoundToEven rounds elements to the nearest integer, rounding ties to even.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x4) RoundToEven() Float32x4
-// RoundToEven rounds elements to the nearest integer.
+// RoundToEven rounds elements to the nearest integer, rounding ties to even.
//
// Asm: VROUNDPS, CPU Feature: AVX
func (x Float32x8) RoundToEven() Float32x8
-// RoundToEven rounds elements to the nearest integer.
+// RoundToEven rounds elements to the nearest integer, rounding ties to even.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x2) RoundToEven() Float64x2
-// RoundToEven rounds elements to the nearest integer.
+// RoundToEven rounds elements to the nearest integer, rounding ties to even.
//
// Asm: VROUNDPD, CPU Feature: AVX
func (x Float64x4) RoundToEven() Float64x4
@@ -5365,334 +5388,304 @@ func (x Uint32x4) SHA256TwoRounds(y Uint32x4, z Uint32x4) Uint32x4
/* SaturateToInt8 */
-// SaturateToInt8 converts element values to int8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToInt8 converts element values to int8 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func (x Int16x8) SaturateToInt8() Int8x16
-// SaturateToInt8 converts element values to int8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToInt8 converts element values to int8 with signed saturation.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func (x Int16x16) SaturateToInt8() Int8x16
-// SaturateToInt8 converts element values to int8.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt8 converts element values to int8 with signed saturation.
//
// Asm: VPMOVSWB, CPU Feature: AVX512
func (x Int16x32) SaturateToInt8() Int8x32
-// SaturateToInt8 converts element values to int8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToInt8 converts element values to int8 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func (x Int32x4) SaturateToInt8() Int8x16
-// SaturateToInt8 converts element values to int8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToInt8 converts element values to int8 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func (x Int32x8) SaturateToInt8() Int8x16
-// SaturateToInt8 converts element values to int8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToInt8 converts element values to int8 with signed saturation.
//
// Asm: VPMOVSDB, CPU Feature: AVX512
func (x Int32x16) SaturateToInt8() Int8x16
-// SaturateToInt8 converts element values to int8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToInt8 converts element values to int8 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func (x Int64x2) SaturateToInt8() Int8x16
-// SaturateToInt8 converts element values to int8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToInt8 converts element values to int8 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func (x Int64x4) SaturateToInt8() Int8x16
-// SaturateToInt8 converts element values to int8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToInt8 converts element values to int8 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSQB, CPU Feature: AVX512
func (x Int64x8) SaturateToInt8() Int8x16
/* SaturateToInt16 */
-// SaturateToInt16 converts element values to int16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt16 converts element values to int16 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSDW, CPU Feature: AVX512
func (x Int32x4) SaturateToInt16() Int16x8
-// SaturateToInt16 converts element values to int16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt16 converts element values to int16 with signed saturation.
//
// Asm: VPMOVSDW, CPU Feature: AVX512
func (x Int32x8) SaturateToInt16() Int16x8
-// SaturateToInt16 converts element values to int16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt16 converts element values to int16 with signed saturation.
//
// Asm: VPMOVSDW, CPU Feature: AVX512
func (x Int32x16) SaturateToInt16() Int16x16
-// SaturateToInt16 converts element values to int16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt16 converts element values to int16 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSQW, CPU Feature: AVX512
func (x Int64x2) SaturateToInt16() Int16x8
-// SaturateToInt16 converts element values to int16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt16 converts element values to int16 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSQW, CPU Feature: AVX512
func (x Int64x4) SaturateToInt16() Int16x8
-// SaturateToInt16 converts element values to int16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt16 converts element values to int16 with signed saturation.
//
// Asm: VPMOVSQW, CPU Feature: AVX512
func (x Int64x8) SaturateToInt16() Int16x8
/* SaturateToInt16Concat */
-// SaturateToInt16Concat converts element values to int16.
-// With each 128-bit as a group:
-// The converted group from the first input vector will be packed to the lower part of the result vector,
-// the converted group from the second input vector will be packed to the upper part of the result vector.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt16Concat converts element values to int16 with signed saturation.
+// The converted elements from x will be packed to the lower part of the result vector,
+// the converted elements from y will be packed to the upper part of the result vector.
//
// Asm: VPACKSSDW, CPU Feature: AVX
func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8
-// SaturateToInt16Concat converts element values to int16.
+/* SaturateToInt16ConcatGrouped */
+
+// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation.
// With each 128-bit as a group:
-// The converted group from the first input vector will be packed to the lower part of the result vector,
-// the converted group from the second input vector will be packed to the upper part of the result vector.
-// Conversion is done with saturation on the vector elements.
+// The converted elements from x will be packed to the lower part of the group in the result vector,
+// the converted elements from y will be packed to the upper part of the group in the result vector.
//
// Asm: VPACKSSDW, CPU Feature: AVX2
-func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16
+func (x Int32x8) SaturateToInt16ConcatGrouped(y Int32x8) Int16x16
-// SaturateToInt16Concat converts element values to int16.
+// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation.
// With each 128-bit as a group:
-// The converted group from the first input vector will be packed to the lower part of the result vector,
-// the converted group from the second input vector will be packed to the upper part of the result vector.
-// Conversion is done with saturation on the vector elements.
+// The converted elements from x will be packed to the lower part of the group in the result vector,
+// the converted elements from y will be packed to the upper part of the group in the result vector.
//
// Asm: VPACKSSDW, CPU Feature: AVX512
-func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32
+func (x Int32x16) SaturateToInt16ConcatGrouped(y Int32x16) Int16x32
/* SaturateToInt32 */
-// SaturateToInt32 converts element values to int32.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt32 converts element values to int32 with signed saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVSQD, CPU Feature: AVX512
func (x Int64x2) SaturateToInt32() Int32x4
-// SaturateToInt32 converts element values to int32.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt32 converts element values to int32 with signed saturation.
//
// Asm: VPMOVSQD, CPU Feature: AVX512
func (x Int64x4) SaturateToInt32() Int32x4
-// SaturateToInt32 converts element values to int32.
-// Conversion is done with saturation on the vector elements.
+// SaturateToInt32 converts element values to int32 with signed saturation.
//
// Asm: VPMOVSQD, CPU Feature: AVX512
func (x Int64x8) SaturateToInt32() Int32x8
/* SaturateToUint8 */
-// SaturateToUint8 converts element values to uint8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToUint8 converts element values to uint8 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
-// Asm: VPMOVSWB, CPU Feature: AVX512
-func (x Int16x8) SaturateToUint8() Int8x16
+// Asm: VPMOVUSWB, CPU Feature: AVX512
+func (x Uint16x8) SaturateToUint8() Uint8x16
-// SaturateToUint8 converts element values to uint8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToUint8 converts element values to uint8 with unsigned saturation.
//
-// Asm: VPMOVSWB, CPU Feature: AVX512
-func (x Int16x16) SaturateToUint8() Int8x16
+// Asm: VPMOVUSWB, CPU Feature: AVX512
+func (x Uint16x16) SaturateToUint8() Uint8x16
-// SaturateToUint8 converts element values to uint8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToUint8 converts element values to uint8 with unsigned saturation.
//
-// Asm: VPMOVSDB, CPU Feature: AVX512
-func (x Int32x4) SaturateToUint8() Int8x16
+// Asm: VPMOVUSWB, CPU Feature: AVX512
+func (x Uint16x32) SaturateToUint8() Uint8x32
-// SaturateToUint8 converts element values to uint8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToUint8 converts element values to uint8 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
-// Asm: VPMOVSDB, CPU Feature: AVX512
-func (x Int32x8) SaturateToUint8() Int8x16
+// Asm: VPMOVUSDB, CPU Feature: AVX512
+func (x Uint32x4) SaturateToUint8() Uint8x16
-// SaturateToUint8 converts element values to uint8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToUint8 converts element values to uint8 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
-// Asm: VPMOVSDB, CPU Feature: AVX512
-func (x Int32x16) SaturateToUint8() Int8x16
+// Asm: VPMOVUSDB, CPU Feature: AVX512
+func (x Uint32x8) SaturateToUint8() Uint8x16
-// SaturateToUint8 converts element values to uint8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToUint8 converts element values to uint8 with unsigned saturation.
//
-// Asm: VPMOVSQB, CPU Feature: AVX512
-func (x Int64x2) SaturateToUint8() Int8x16
+// Asm: VPMOVUSDB, CPU Feature: AVX512
+func (x Uint32x16) SaturateToUint8() Uint8x16
-// SaturateToUint8 converts element values to uint8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToUint8 converts element values to uint8 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
-// Asm: VPMOVSQB, CPU Feature: AVX512
-func (x Int64x4) SaturateToUint8() Int8x16
+// Asm: VPMOVUSQB, CPU Feature: AVX512
+func (x Uint64x2) SaturateToUint8() Uint8x16
-// SaturateToUint8 converts element values to uint8.
-// Conversion is done with saturation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// SaturateToUint8 converts element values to uint8 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
-// Asm: VPMOVSQB, CPU Feature: AVX512
-func (x Int64x8) SaturateToUint8() Int8x16
+// Asm: VPMOVUSQB, CPU Feature: AVX512
+func (x Uint64x4) SaturateToUint8() Uint8x16
-// SaturateToUint8 converts element values to uint8.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint8 converts element values to uint8 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
-// Asm: VPMOVUSWB, CPU Feature: AVX512
-func (x Uint16x32) SaturateToUint8() Uint8x32
+// Asm: VPMOVUSQB, CPU Feature: AVX512
+func (x Uint64x8) SaturateToUint8() Uint8x16
/* SaturateToUint16 */
-// SaturateToUint16 converts element values to uint16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint16 converts element values to uint16 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVUSDW, CPU Feature: AVX512
func (x Uint32x4) SaturateToUint16() Uint16x8
-// SaturateToUint16 converts element values to uint16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint16 converts element values to uint16 with unsigned saturation.
//
// Asm: VPMOVUSDW, CPU Feature: AVX512
func (x Uint32x8) SaturateToUint16() Uint16x8
-// SaturateToUint16 converts element values to uint16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint16 converts element values to uint16 with unsigned saturation.
//
// Asm: VPMOVUSDW, CPU Feature: AVX512
func (x Uint32x16) SaturateToUint16() Uint16x16
-// SaturateToUint16 converts element values to uint16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint16 converts element values to uint16 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVUSQW, CPU Feature: AVX512
func (x Uint64x2) SaturateToUint16() Uint16x8
-// SaturateToUint16 converts element values to uint16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint16 converts element values to uint16 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVUSQW, CPU Feature: AVX512
func (x Uint64x4) SaturateToUint16() Uint16x8
-// SaturateToUint16 converts element values to uint16.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint16 converts element values to uint16 with unsigned saturation.
//
// Asm: VPMOVUSQW, CPU Feature: AVX512
func (x Uint64x8) SaturateToUint16() Uint16x8
/* SaturateToUint16Concat */
-// SaturateToUint16Concat converts element values to uint16.
-// With each 128-bit as a group:
-// The converted group from the first input vector will be packed to the lower part of the result vector,
-// the converted group from the second input vector will be packed to the upper part of the result vector.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint16Concat converts element values to uint16 with unsigned saturation.
+// The converted elements from x will be packed to the lower part of the result vector,
+// the converted elements from y will be packed to the upper part of the result vector.
//
// Asm: VPACKUSDW, CPU Feature: AVX
-func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8
+func (x Int32x4) SaturateToUint16Concat(y Int32x4) Uint16x8
+
+/* SaturateToUint16ConcatGrouped */
-// SaturateToUint16Concat converts element values to uint16.
+// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation.
// With each 128-bit as a group:
-// The converted group from the first input vector will be packed to the lower part of the result vector,
-// the converted group from the second input vector will be packed to the upper part of the result vector.
-// Conversion is done with saturation on the vector elements.
+// The converted elements from x will be packed to the lower part of the group in the result vector,
+// the converted elements from y will be packed to the upper part of the group in the result vector.
//
// Asm: VPACKUSDW, CPU Feature: AVX2
-func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16
+func (x Int32x8) SaturateToUint16ConcatGrouped(y Int32x8) Uint16x16
-// SaturateToUint16Concat converts element values to uint16.
+// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation.
// With each 128-bit as a group:
-// The converted group from the first input vector will be packed to the lower part of the result vector,
-// the converted group from the second input vector will be packed to the upper part of the result vector.
-// Conversion is done with saturation on the vector elements.
+// The converted elements from x will be packed to the lower part of the group in the result vector,
+// the converted elements from y will be packed to the upper part of the group in the result vector.
//
// Asm: VPACKUSDW, CPU Feature: AVX512
-func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32
+func (x Int32x16) SaturateToUint16ConcatGrouped(y Int32x16) Uint16x32
/* SaturateToUint32 */
-// SaturateToUint32 converts element values to uint32.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint32 converts element values to uint32 with unsigned saturation.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVUSQD, CPU Feature: AVX512
func (x Uint64x2) SaturateToUint32() Uint32x4
-// SaturateToUint32 converts element values to uint32.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint32 converts element values to uint32 with unsigned saturation.
//
// Asm: VPMOVUSQD, CPU Feature: AVX512
func (x Uint64x4) SaturateToUint32() Uint32x4
-// SaturateToUint32 converts element values to uint32.
-// Conversion is done with saturation on the vector elements.
+// SaturateToUint32 converts element values to uint32 with unsigned saturation.
//
// Asm: VPMOVUSQD, CPU Feature: AVX512
func (x Uint64x8) SaturateToUint32() Uint32x8
/* Scale */
-// Scale multiplies elements by a power of 2.
+// Scale multiplies each element of x by 2 raised to the power of the
+// floor of the corresponding element in y.
//
// Asm: VSCALEFPS, CPU Feature: AVX512
func (x Float32x4) Scale(y Float32x4) Float32x4
-// Scale multiplies elements by a power of 2.
+// Scale multiplies each element of x by 2 raised to the power of the
+// floor of the corresponding element in y.
//
// Asm: VSCALEFPS, CPU Feature: AVX512
func (x Float32x8) Scale(y Float32x8) Float32x8
-// Scale multiplies elements by a power of 2.
+// Scale multiplies each element of x by 2 raised to the power of the
+// floor of the corresponding element in y.
//
// Asm: VSCALEFPS, CPU Feature: AVX512
func (x Float32x16) Scale(y Float32x16) Float32x16
-// Scale multiplies elements by a power of 2.
+// Scale multiplies each element of x by 2 raised to the power of the
+// floor of the corresponding element in y.
//
// Asm: VSCALEFPD, CPU Feature: AVX512
func (x Float64x2) Scale(y Float64x2) Float64x2
-// Scale multiplies elements by a power of 2.
+// Scale multiplies each element of x by 2 raised to the power of the
+// floor of the corresponding element in y.
//
// Asm: VSCALEFPD, CPU Feature: AVX512
func (x Float64x4) Scale(y Float64x4) Float64x4
-// Scale multiplies elements by a power of 2.
+// Scale multiplies each element of x by 2 raised to the power of the
+// floor of the corresponding element in y.
//
// Asm: VSCALEFPD, CPU Feature: AVX512
func (x Float64x8) Scale(y Float64x8) Float64x8
@@ -6131,236 +6124,236 @@ func (x Uint64x8) SetLo(y Uint64x4) Uint64x8
/* ShiftAllLeft */
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLW, CPU Feature: AVX
func (x Int16x8) ShiftAllLeft(y uint64) Int16x8
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLW, CPU Feature: AVX2
func (x Int16x16) ShiftAllLeft(y uint64) Int16x16
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLW, CPU Feature: AVX512
func (x Int16x32) ShiftAllLeft(y uint64) Int16x32
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLD, CPU Feature: AVX
func (x Int32x4) ShiftAllLeft(y uint64) Int32x4
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLD, CPU Feature: AVX2
func (x Int32x8) ShiftAllLeft(y uint64) Int32x8
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLD, CPU Feature: AVX512
func (x Int32x16) ShiftAllLeft(y uint64) Int32x16
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLQ, CPU Feature: AVX
func (x Int64x2) ShiftAllLeft(y uint64) Int64x2
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLQ, CPU Feature: AVX2
func (x Int64x4) ShiftAllLeft(y uint64) Int64x4
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLQ, CPU Feature: AVX512
func (x Int64x8) ShiftAllLeft(y uint64) Int64x8
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLW, CPU Feature: AVX
func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLW, CPU Feature: AVX2
func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLW, CPU Feature: AVX512
func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLD, CPU Feature: AVX
func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLD, CPU Feature: AVX2
func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLD, CPU Feature: AVX512
func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLQ, CPU Feature: AVX
func (x Uint64x2) ShiftAllLeft(y uint64) Uint64x2
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLQ, CPU Feature: AVX2
func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4
-// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+// ShiftAllLeft shifts each element to the left by y bits.
//
// Asm: VPSLLQ, CPU Feature: AVX512
func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8
/* ShiftAllLeftConcat */
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftAllLeftConcat(shift uint8, y Int16x8) Int16x8
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftAllLeftConcat(shift uint8, y Int16x16) Int16x16
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftAllLeftConcat(shift uint8, y Int16x32) Int16x32
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftAllLeftConcat(shift uint8, y Int32x4) Int32x4
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftAllLeftConcat(shift uint8, y Int32x8) Int32x8
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftAllLeftConcat(shift uint8, y Int32x16) Int32x16
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftAllLeftConcat(shift uint8, y Int64x2) Int64x2
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftAllLeftConcat(shift uint8, y Int64x4) Int64x4
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftAllLeftConcat(shift uint8, y Int64x8) Int64x8
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftAllLeftConcat(shift uint8, y Uint16x8) Uint16x8
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftAllLeftConcat(shift uint8, y Uint16x16) Uint16x16
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftAllLeftConcat(shift uint8, y Uint16x32) Uint16x32
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftAllLeftConcat(shift uint8, y Uint32x4) Uint32x4
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftAllLeftConcat(shift uint8, y Uint32x8) Uint32x8
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftAllLeftConcat(shift uint8, y Uint32x16) Uint32x16
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftAllLeftConcat(shift uint8, y Uint64x2) Uint64x2
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4
-// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
@@ -6369,236 +6362,236 @@ func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
/* ShiftAllRight */
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRight performs a signed right shift on each element by y bits.
//
// Asm: VPSRAW, CPU Feature: AVX
func (x Int16x8) ShiftAllRight(y uint64) Int16x8
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRight performs a signed right shift on each element by y bits.
//
// Asm: VPSRAW, CPU Feature: AVX2
func (x Int16x16) ShiftAllRight(y uint64) Int16x16
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRight performs a signed right shift on each element by y bits.
//
// Asm: VPSRAW, CPU Feature: AVX512
func (x Int16x32) ShiftAllRight(y uint64) Int16x32
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRight performs a signed right shift on each element by y bits.
//
// Asm: VPSRAD, CPU Feature: AVX
func (x Int32x4) ShiftAllRight(y uint64) Int32x4
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRight performs a signed right shift on each element by y bits.
//
// Asm: VPSRAD, CPU Feature: AVX2
func (x Int32x8) ShiftAllRight(y uint64) Int32x8
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRight performs a signed right shift on each element by y bits.
//
// Asm: VPSRAD, CPU Feature: AVX512
func (x Int32x16) ShiftAllRight(y uint64) Int32x16
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRight performs a signed right shift on each element by y bits.
//
// Asm: VPSRAQ, CPU Feature: AVX512
func (x Int64x2) ShiftAllRight(y uint64) Int64x2
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRight performs a signed right shift on each element by y bits.
//
// Asm: VPSRAQ, CPU Feature: AVX512
func (x Int64x4) ShiftAllRight(y uint64) Int64x4
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRight performs a signed right shift on each element by y bits.
//
// Asm: VPSRAQ, CPU Feature: AVX512
func (x Int64x8) ShiftAllRight(y uint64) Int64x8
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight performs an unsigned right shift on each element by y bits.
//
// Asm: VPSRLW, CPU Feature: AVX
func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight performs an unsigned right shift on each element by y bits.
//
// Asm: VPSRLW, CPU Feature: AVX2
func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight performs an unsigned right shift on each element by y bits.
//
// Asm: VPSRLW, CPU Feature: AVX512
func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight performs an unsigned right shift on each element by y bits.
//
// Asm: VPSRLD, CPU Feature: AVX
func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight performs an unsigned right shift on each element by y bits.
//
// Asm: VPSRLD, CPU Feature: AVX2
func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight performs an unsigned right shift on each element by y bits.
//
// Asm: VPSRLD, CPU Feature: AVX512
func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight performs an unsigned right shift on each element by y bits.
//
// Asm: VPSRLQ, CPU Feature: AVX
func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight performs an unsigned right shift on each element by y bits.
//
// Asm: VPSRLQ, CPU Feature: AVX2
func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight performs an unsigned right shift on each element by y bits.
//
// Asm: VPSRLQ, CPU Feature: AVX512
func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8
/* ShiftAllRightConcat */
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftAllRightConcat(shift uint8, y Int16x8) Int16x8
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftAllRightConcat(shift uint8, y Int16x16) Int16x16
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftAllRightConcat(shift uint8, y Int16x32) Int16x32
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftAllRightConcat(shift uint8, y Int32x4) Int32x4
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftAllRightConcat(shift uint8, y Int32x8) Int32x8
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftAllRightConcat(shift uint8, y Int32x16) Int32x16
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftAllRightConcat(shift uint8, y Int64x2) Int64x2
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftAllRightConcat(shift uint8, y Int64x4) Int64x4
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftAllRightConcat(shift uint8, y Int64x8) Int64x8
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftAllRightConcat(shift uint8, y Uint16x8) Uint16x8
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftAllRightConcat(shift uint8, y Uint16x16) Uint16x16
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by
+// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
//
// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
//
@@ -6607,92 +6600,92 @@ func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
/* ShiftLeft */
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Int16x8) ShiftLeft(y Int16x8) Int16x8
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Int16x16) ShiftLeft(y Int16x16) Int16x16
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Int16x32) ShiftLeft(y Int16x32) Int16x32
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Int32x4) ShiftLeft(y Int32x4) Int32x4
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Int32x8) ShiftLeft(y Int32x8) Int32x8
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVD, CPU Feature: AVX512
func (x Int32x16) ShiftLeft(y Int32x16) Int32x16
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Int64x2) ShiftLeft(y Int64x2) Int64x2
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Int64x4) ShiftLeft(y Int64x4) Int64x4
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVQ, CPU Feature: AVX512
func (x Int64x8) ShiftLeft(y Int64x8) Int64x8
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVW, CPU Feature: AVX512
func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Uint32x4) ShiftLeft(y Uint32x4) Uint32x4
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVD, CPU Feature: AVX2
func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVD, CPU Feature: AVX512
func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Uint64x2) ShiftLeft(y Uint64x2) Uint64x2
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVQ, CPU Feature: AVX2
func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4
-// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements.
//
// Asm: VPSLLVQ, CPU Feature: AVX512
func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8
@@ -6700,201 +6693,201 @@ func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8
/* ShiftLeftConcat */
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftLeftConcat(y Int16x8, z Int16x8) Int16x8
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftLeftConcat(y Int16x16, z Int16x16) Int16x16
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftLeftConcat(y Int16x32, z Int16x32) Int16x32
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftLeftConcat(y Int32x4, z Int32x4) Int32x4
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftLeftConcat(y Int32x8, z Int32x8) Int32x8
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftLeftConcat(y Int32x16, z Int32x16) Int32x16
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftLeftConcat(y Int64x2, z Int64x2) Int64x2
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftLeftConcat(y Int64x4, z Int64x4) Int64x4
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftLeftConcat(y Int64x8, z Int64x8) Int64x8
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftLeftConcat(y Uint16x8, z Uint16x8) Uint16x8
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftLeftConcat(y Uint16x16, z Uint16x16) Uint16x16
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
//
// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
/* ShiftRight */
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRAVW, CPU Feature: AVX512
func (x Int16x8) ShiftRight(y Int16x8) Int16x8
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRAVW, CPU Feature: AVX512
func (x Int16x16) ShiftRight(y Int16x16) Int16x16
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRAVW, CPU Feature: AVX512
func (x Int16x32) ShiftRight(y Int16x32) Int16x32
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRAVD, CPU Feature: AVX2
func (x Int32x4) ShiftRight(y Int32x4) Int32x4
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRAVD, CPU Feature: AVX2
func (x Int32x8) ShiftRight(y Int32x8) Int32x8
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRAVD, CPU Feature: AVX512
func (x Int32x16) ShiftRight(y Int32x16) Int32x16
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRAVQ, CPU Feature: AVX512
func (x Int64x2) ShiftRight(y Int64x2) Int64x2
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRAVQ, CPU Feature: AVX512
func (x Int64x4) ShiftRight(y Int64x4) Int64x4
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
+// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRAVQ, CPU Feature: AVX512
func (x Int64x8) ShiftRight(y Int64x8) Int64x8
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRLVW, CPU Feature: AVX512
func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRLVW, CPU Feature: AVX512
func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRLVW, CPU Feature: AVX512
func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRLVD, CPU Feature: AVX2
func (x Uint32x4) ShiftRight(y Uint32x4) Uint32x4
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRLVD, CPU Feature: AVX2
func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRLVD, CPU Feature: AVX512
func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRLVQ, CPU Feature: AVX2
func (x Uint64x2) ShiftRight(y Uint64x2) Uint64x2
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRLVQ, CPU Feature: AVX2
func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements.
//
// Asm: VPSRLVQ, CPU Feature: AVX512
func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8
@@ -6902,109 +6895,109 @@ func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8
/* ShiftRightConcat */
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x8) ShiftRightConcat(y Int16x8, z Int16x8) Int16x8
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x16) ShiftRightConcat(y Int16x16, z Int16x16) Int16x16
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Int16x32) ShiftRightConcat(y Int16x32, z Int16x32) Int16x32
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x4) ShiftRightConcat(y Int32x4, z Int32x4) Int32x4
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x8) ShiftRightConcat(y Int32x8, z Int32x8) Int32x8
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Int32x16) ShiftRightConcat(y Int32x16, z Int32x16) Int32x16
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
+// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
//
// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
@@ -7196,90 +7189,101 @@ func (x Uint64x8) Sub(y Uint64x8) Uint64x8
/* SubPairs */
// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
// Asm: VHSUBPS, CPU Feature: AVX
func (x Float32x4) SubPairs(y Float32x4) Float32x4
// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
-//
-// Asm: VHSUBPS, CPU Feature: AVX
-func (x Float32x8) SubPairs(y Float32x8) Float32x8
-
-// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// For x = [x0, x1] and y = [y0, y1], the result is [x0-x1, y0-y1].
//
// Asm: VHSUBPD, CPU Feature: AVX
func (x Float64x2) SubPairs(y Float64x2) Float64x2
// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
-// Asm: VHSUBPD, CPU Feature: AVX
-func (x Float64x4) SubPairs(y Float64x4) Float64x4
+// Asm: VPHSUBW, CPU Feature: AVX
+func (x Int16x8) SubPairs(y Int16x8) Int16x8
// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
-// Asm: VPHSUBW, CPU Feature: AVX
-func (x Int16x8) SubPairs(y Int16x8) Int16x8
+// Asm: VPHSUBD, CPU Feature: AVX
+func (x Int32x4) SubPairs(y Int32x4) Int32x4
// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
-// Asm: VPHSUBW, CPU Feature: AVX2
-func (x Int16x16) SubPairs(y Int16x16) Int16x16
+// Asm: VPHSUBW, CPU Feature: AVX
+func (x Uint16x8) SubPairs(y Uint16x8) Uint16x8
// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX
-func (x Int32x4) SubPairs(y Int32x4) Int32x4
+func (x Uint32x4) SubPairs(y Uint32x4) Uint32x4
-// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+/* SubPairsGrouped */
+
+// SubPairsGrouped horizontally subtracts adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
-// Asm: VPHSUBD, CPU Feature: AVX2
-func (x Int32x8) SubPairs(y Int32x8) Int32x8
+// Asm: VHSUBPS, CPU Feature: AVX
+func (x Float32x8) SubPairsGrouped(y Float32x8) Float32x8
-// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// SubPairsGrouped horizontally subtracts adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1] and y = [y0, y1], the result is [x0-x1, y0-y1].
//
-// Asm: VPHSUBW, CPU Feature: AVX
-func (x Uint16x8) SubPairs(y Uint16x8) Uint16x8
+// Asm: VHSUBPD, CPU Feature: AVX
+func (x Float64x4) SubPairsGrouped(y Float64x4) Float64x4
-// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// SubPairsGrouped horizontally subtracts adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
// Asm: VPHSUBW, CPU Feature: AVX2
-func (x Uint16x16) SubPairs(y Uint16x16) Uint16x16
+func (x Int16x16) SubPairsGrouped(y Int16x16) Int16x16
-// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// SubPairsGrouped horizontally subtracts adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
-// Asm: VPHSUBD, CPU Feature: AVX
-func (x Uint32x4) SubPairs(y Uint32x4) Uint32x4
+// Asm: VPHSUBD, CPU Feature: AVX2
+func (x Int32x8) SubPairsGrouped(y Int32x8) Int32x8
-// SubPairs horizontally subtracts adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// SubPairsGrouped horizontally subtracts adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
+//
+// Asm: VPHSUBW, CPU Feature: AVX2
+func (x Uint16x16) SubPairsGrouped(y Uint16x16) Uint16x16
+
+// SubPairsGrouped horizontally subtracts adjacent pairs of elements.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
// Asm: VPHSUBD, CPU Feature: AVX2
-func (x Uint32x8) SubPairs(y Uint32x8) Uint32x8
+func (x Uint32x8) SubPairsGrouped(y Uint32x8) Uint32x8
/* SubPairsSaturated */
// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
// Asm: VPHSUBSW, CPU Feature: AVX
func (x Int16x8) SubPairsSaturated(y Int16x8) Int16x8
-// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...].
+/* SubPairsSaturatedGrouped */
+
+// SubPairsSaturatedGrouped horizontally subtracts adjacent pairs of elements with saturation.
+// With each 128-bit as a group:
+// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...].
//
// Asm: VPHSUBSW, CPU Feature: AVX2
-func (x Int16x16) SubPairsSaturated(y Int16x16) Int16x16
+func (x Int16x16) SubPairsSaturatedGrouped(y Int16x16) Int16x16
/* SubSaturated */
@@ -7478,244 +7482,212 @@ func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8
/* TruncateToInt8 */
-// TruncateToInt8 converts element values to int8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToInt8 truncates element values to int8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Int16x8) TruncateToInt8() Int8x16
-// TruncateToInt8 converts element values to int8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToInt8 truncates element values to int8.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Int16x16) TruncateToInt8() Int8x16
-// TruncateToInt8 converts element values to int8.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt8 truncates element values to int8.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Int16x32) TruncateToInt8() Int8x32
-// TruncateToInt8 converts element values to int8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToInt8 truncates element values to int8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Int32x4) TruncateToInt8() Int8x16
-// TruncateToInt8 converts element values to int8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToInt8 truncates element values to int8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Int32x8) TruncateToInt8() Int8x16
-// TruncateToInt8 converts element values to int8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToInt8 truncates element values to int8.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Int32x16) TruncateToInt8() Int8x16
-// TruncateToInt8 converts element values to int8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToInt8 truncates element values to int8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Int64x2) TruncateToInt8() Int8x16
-// TruncateToInt8 converts element values to int8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToInt8 truncates element values to int8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Int64x4) TruncateToInt8() Int8x16
-// TruncateToInt8 converts element values to int8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToInt8 truncates element values to int8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Int64x8) TruncateToInt8() Int8x16
/* TruncateToInt16 */
-// TruncateToInt16 converts element values to int16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt16 truncates element values to int16.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Int32x4) TruncateToInt16() Int16x8
-// TruncateToInt16 converts element values to int16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt16 truncates element values to int16.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Int32x8) TruncateToInt16() Int16x8
-// TruncateToInt16 converts element values to int16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt16 truncates element values to int16.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Int32x16) TruncateToInt16() Int16x16
-// TruncateToInt16 converts element values to int16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt16 truncates element values to int16.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Int64x2) TruncateToInt16() Int16x8
-// TruncateToInt16 converts element values to int16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt16 truncates element values to int16.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Int64x4) TruncateToInt16() Int16x8
-// TruncateToInt16 converts element values to int16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt16 truncates element values to int16.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Int64x8) TruncateToInt16() Int16x8
/* TruncateToInt32 */
-// TruncateToInt32 converts element values to int32.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt32 truncates element values to int32.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Int64x2) TruncateToInt32() Int32x4
-// TruncateToInt32 converts element values to int32.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt32 truncates element values to int32.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Int64x4) TruncateToInt32() Int32x4
-// TruncateToInt32 converts element values to int32.
-// Conversion is done with truncation on the vector elements.
+// TruncateToInt32 truncates element values to int32.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Int64x8) TruncateToInt32() Int32x8
/* TruncateToUint8 */
-// TruncateToUint8 converts element values to uint8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToUint8 truncates element values to uint8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Uint16x8) TruncateToUint8() Uint8x16
-// TruncateToUint8 converts element values to uint8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToUint8 truncates element values to uint8.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Uint16x16) TruncateToUint8() Uint8x16
-// TruncateToUint8 converts element values to uint8.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint8 truncates element values to uint8.
//
// Asm: VPMOVWB, CPU Feature: AVX512
func (x Uint16x32) TruncateToUint8() Uint8x32
-// TruncateToUint8 converts element values to uint8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToUint8 truncates element values to uint8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Uint32x4) TruncateToUint8() Uint8x16
-// TruncateToUint8 converts element values to uint8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToUint8 truncates element values to uint8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Uint32x8) TruncateToUint8() Uint8x16
-// TruncateToUint8 converts element values to uint8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToUint8 truncates element values to uint8.
//
// Asm: VPMOVDB, CPU Feature: AVX512
func (x Uint32x16) TruncateToUint8() Uint8x16
-// TruncateToUint8 converts element values to uint8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToUint8 truncates element values to uint8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Uint64x2) TruncateToUint8() Uint8x16
-// TruncateToUint8 converts element values to uint8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToUint8 truncates element values to uint8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Uint64x4) TruncateToUint8() Uint8x16
-// TruncateToUint8 converts element values to uint8.
-// Conversion is done with truncation on the vector elements.
-// Results are packed to low elements in the returned vector, its upper elements are zero-cleared.
+// TruncateToUint8 truncates element values to uint8.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQB, CPU Feature: AVX512
func (x Uint64x8) TruncateToUint8() Uint8x16
/* TruncateToUint16 */
-// TruncateToUint16 converts element values to uint16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint16 truncates element values to uint16.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Uint32x4) TruncateToUint16() Uint16x8
-// TruncateToUint16 converts element values to uint16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint16 truncates element values to uint16.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Uint32x8) TruncateToUint16() Uint16x8
-// TruncateToUint16 converts element values to uint16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint16 truncates element values to uint16.
//
// Asm: VPMOVDW, CPU Feature: AVX512
func (x Uint32x16) TruncateToUint16() Uint16x16
-// TruncateToUint16 converts element values to uint16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint16 truncates element values to uint16.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Uint64x2) TruncateToUint16() Uint16x8
-// TruncateToUint16 converts element values to uint16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint16 truncates element values to uint16.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Uint64x4) TruncateToUint16() Uint16x8
-// TruncateToUint16 converts element values to uint16.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint16 truncates element values to uint16.
//
// Asm: VPMOVQW, CPU Feature: AVX512
func (x Uint64x8) TruncateToUint16() Uint16x8
/* TruncateToUint32 */
-// TruncateToUint32 converts element values to uint32.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint32 truncates element values to uint32.
+// Results are packed to low elements in the returned vector, its upper elements are zeroed.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Uint64x2) TruncateToUint32() Uint32x4
-// TruncateToUint32 converts element values to uint32.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint32 truncates element values to uint32.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Uint64x4) TruncateToUint32() Uint32x4
-// TruncateToUint32 converts element values to uint32.
-// Conversion is done with truncation on the vector elements.
+// TruncateToUint32 truncates element values to uint32.
//
// Asm: VPMOVQD, CPU Feature: AVX512
func (x Uint64x8) TruncateToUint32() Uint32x8
@@ -7842,930 +7814,930 @@ func (x Uint64x4) Xor(y Uint64x4) Uint64x4
// Asm: VPXORQ, CPU Feature: AVX512
func (x Uint64x8) Xor(y Uint64x8) Uint64x8
-// Float64x2 converts from Float32x4 to Float64x2
-func (from Float32x4) AsFloat64x2() (to Float64x2)
+// AsFloat64x2 returns a Float64x2 with the same bit representation as x.
+func (x Float32x4) AsFloat64x2() Float64x2
-// Int8x16 converts from Float32x4 to Int8x16
-func (from Float32x4) AsInt8x16() (to Int8x16)
+// AsInt8x16 returns an Int8x16 with the same bit representation as x.
+func (x Float32x4) AsInt8x16() Int8x16
-// Int16x8 converts from Float32x4 to Int16x8
-func (from Float32x4) AsInt16x8() (to Int16x8)
+// AsInt16x8 returns an Int16x8 with the same bit representation as x.
+func (x Float32x4) AsInt16x8() Int16x8
-// Int32x4 converts from Float32x4 to Int32x4
-func (from Float32x4) AsInt32x4() (to Int32x4)
+// AsInt32x4 returns an Int32x4 with the same bit representation as x.
+func (x Float32x4) AsInt32x4() Int32x4
-// Int64x2 converts from Float32x4 to Int64x2
-func (from Float32x4) AsInt64x2() (to Int64x2)
+// AsInt64x2 returns an Int64x2 with the same bit representation as x.
+func (x Float32x4) AsInt64x2() Int64x2
-// Uint8x16 converts from Float32x4 to Uint8x16
-func (from Float32x4) AsUint8x16() (to Uint8x16)
+// AsUint8x16 returns a Uint8x16 with the same bit representation as x.
+func (x Float32x4) AsUint8x16() Uint8x16
-// Uint16x8 converts from Float32x4 to Uint16x8
-func (from Float32x4) AsUint16x8() (to Uint16x8)
+// AsUint16x8 returns a Uint16x8 with the same bit representation as x.
+func (x Float32x4) AsUint16x8() Uint16x8
-// Uint32x4 converts from Float32x4 to Uint32x4
-func (from Float32x4) AsUint32x4() (to Uint32x4)
+// AsUint32x4 returns a Uint32x4 with the same bit representation as x.
+func (x Float32x4) AsUint32x4() Uint32x4
-// Uint64x2 converts from Float32x4 to Uint64x2
-func (from Float32x4) AsUint64x2() (to Uint64x2)
+// AsUint64x2 returns a Uint64x2 with the same bit representation as x.
+func (x Float32x4) AsUint64x2() Uint64x2
-// Float64x4 converts from Float32x8 to Float64x4
-func (from Float32x8) AsFloat64x4() (to Float64x4)
+// AsFloat64x4 returns a Float64x4 with the same bit representation as x.
+func (x Float32x8) AsFloat64x4() Float64x4
-// Int8x32 converts from Float32x8 to Int8x32
-func (from Float32x8) AsInt8x32() (to Int8x32)
+// AsInt8x32 returns an Int8x32 with the same bit representation as x.
+func (x Float32x8) AsInt8x32() Int8x32
-// Int16x16 converts from Float32x8 to Int16x16
-func (from Float32x8) AsInt16x16() (to Int16x16)
+// AsInt16x16 returns an Int16x16 with the same bit representation as x.
+func (x Float32x8) AsInt16x16() Int16x16
-// Int32x8 converts from Float32x8 to Int32x8
-func (from Float32x8) AsInt32x8() (to Int32x8)
+// AsInt32x8 returns an Int32x8 with the same bit representation as x.
+func (x Float32x8) AsInt32x8() Int32x8
-// Int64x4 converts from Float32x8 to Int64x4
-func (from Float32x8) AsInt64x4() (to Int64x4)
+// AsInt64x4 returns an Int64x4 with the same bit representation as x.
+func (x Float32x8) AsInt64x4() Int64x4
-// Uint8x32 converts from Float32x8 to Uint8x32
-func (from Float32x8) AsUint8x32() (to Uint8x32)
+// AsUint8x32 returns a Uint8x32 with the same bit representation as x.
+func (x Float32x8) AsUint8x32() Uint8x32
-// Uint16x16 converts from Float32x8 to Uint16x16
-func (from Float32x8) AsUint16x16() (to Uint16x16)
+// AsUint16x16 returns a Uint16x16 with the same bit representation as x.
+func (x Float32x8) AsUint16x16() Uint16x16
-// Uint32x8 converts from Float32x8 to Uint32x8
-func (from Float32x8) AsUint32x8() (to Uint32x8)
+// AsUint32x8 returns a Uint32x8 with the same bit representation as x.
+func (x Float32x8) AsUint32x8() Uint32x8
-// Uint64x4 converts from Float32x8 to Uint64x4
-func (from Float32x8) AsUint64x4() (to Uint64x4)
+// AsUint64x4 returns a Uint64x4 with the same bit representation as x.
+func (x Float32x8) AsUint64x4() Uint64x4
-// Float64x8 converts from Float32x16 to Float64x8
-func (from Float32x16) AsFloat64x8() (to Float64x8)
+// AsFloat64x8 returns a Float64x8 with the same bit representation as x.
+func (x Float32x16) AsFloat64x8() Float64x8
-// Int8x64 converts from Float32x16 to Int8x64
-func (from Float32x16) AsInt8x64() (to Int8x64)
+// AsInt8x64 returns an Int8x64 with the same bit representation as x.
+func (x Float32x16) AsInt8x64() Int8x64
-// Int16x32 converts from Float32x16 to Int16x32
-func (from Float32x16) AsInt16x32() (to Int16x32)
+// AsInt16x32 returns an Int16x32 with the same bit representation as x.
+func (x Float32x16) AsInt16x32() Int16x32
-// Int32x16 converts from Float32x16 to Int32x16
-func (from Float32x16) AsInt32x16() (to Int32x16)
+// AsInt32x16 returns an Int32x16 with the same bit representation as x.
+func (x Float32x16) AsInt32x16() Int32x16
-// Int64x8 converts from Float32x16 to Int64x8
-func (from Float32x16) AsInt64x8() (to Int64x8)
+// AsInt64x8 returns an Int64x8 with the same bit representation as x.
+func (x Float32x16) AsInt64x8() Int64x8
-// Uint8x64 converts from Float32x16 to Uint8x64
-func (from Float32x16) AsUint8x64() (to Uint8x64)
+// AsUint8x64 returns a Uint8x64 with the same bit representation as x.
+func (x Float32x16) AsUint8x64() Uint8x64
-// Uint16x32 converts from Float32x16 to Uint16x32
-func (from Float32x16) AsUint16x32() (to Uint16x32)
+// AsUint16x32 returns a Uint16x32 with the same bit representation as x.
+func (x Float32x16) AsUint16x32() Uint16x32
-// Uint32x16 converts from Float32x16 to Uint32x16
-func (from Float32x16) AsUint32x16() (to Uint32x16)
+// AsUint32x16 returns a Uint32x16 with the same bit representation as x.
+func (x Float32x16) AsUint32x16() Uint32x16
-// Uint64x8 converts from Float32x16 to Uint64x8
-func (from Float32x16) AsUint64x8() (to Uint64x8)
+// AsUint64x8 returns a Uint64x8 with the same bit representation as x.
+func (x Float32x16) AsUint64x8() Uint64x8
-// Float32x4 converts from Float64x2 to Float32x4
-func (from Float64x2) AsFloat32x4() (to Float32x4)
+// AsFloat32x4 returns a Float32x4 with the same bit representation as x.
+func (x Float64x2) AsFloat32x4() Float32x4
-// Int8x16 converts from Float64x2 to Int8x16
-func (from Float64x2) AsInt8x16() (to Int8x16)
+// AsInt8x16 returns an Int8x16 with the same bit representation as x.
+func (x Float64x2) AsInt8x16() Int8x16
-// Int16x8 converts from Float64x2 to Int16x8
-func (from Float64x2) AsInt16x8() (to Int16x8)
+// AsInt16x8 returns an Int16x8 with the same bit representation as x.
+func (x Float64x2) AsInt16x8() Int16x8
-// Int32x4 converts from Float64x2 to Int32x4
-func (from Float64x2) AsInt32x4() (to Int32x4)
+// AsInt32x4 returns an Int32x4 with the same bit representation as x.
+func (x Float64x2) AsInt32x4() Int32x4
-// Int64x2 converts from Float64x2 to Int64x2
-func (from Float64x2) AsInt64x2() (to Int64x2)
+// AsInt64x2 returns an Int64x2 with the same bit representation as x.
+func (x Float64x2) AsInt64x2() Int64x2
-// Uint8x16 converts from Float64x2 to Uint8x16
-func (from Float64x2) AsUint8x16() (to Uint8x16)
+// AsUint8x16 returns a Uint8x16 with the same bit representation as x.
+func (x Float64x2) AsUint8x16() Uint8x16
-// Uint16x8 converts from Float64x2 to Uint16x8
-func (from Float64x2) AsUint16x8() (to Uint16x8)
+// AsUint16x8 returns a Uint16x8 with the same bit representation as x.
+func (x Float64x2) AsUint16x8() Uint16x8
-// Uint32x4 converts from Float64x2 to Uint32x4
-func (from Float64x2) AsUint32x4() (to Uint32x4)
+// AsUint32x4 returns a Uint32x4 with the same bit representation as x.
+func (x Float64x2) AsUint32x4() Uint32x4
-// Uint64x2 converts from Float64x2 to Uint64x2
-func (from Float64x2) AsUint64x2() (to Uint64x2)
+// AsUint64x2 returns a Uint64x2 with the same bit representation as x.
+func (x Float64x2) AsUint64x2() Uint64x2
-// Float32x8 converts from Float64x4 to Float32x8
-func (from Float64x4) AsFloat32x8() (to Float32x8)
+// AsFloat32x8 returns a Float32x8 with the same bit representation as x.
+func (x Float64x4) AsFloat32x8() Float32x8
-// Int8x32 converts from Float64x4 to Int8x32
-func (from Float64x4) AsInt8x32() (to Int8x32)
+// AsInt8x32 returns an Int8x32 with the same bit representation as x.
+func (x Float64x4) AsInt8x32() Int8x32
-// Int16x16 converts from Float64x4 to Int16x16
-func (from Float64x4) AsInt16x16() (to Int16x16)
+// AsInt16x16 returns an Int16x16 with the same bit representation as x.
+func (x Float64x4) AsInt16x16() Int16x16
-// Int32x8 converts from Float64x4 to Int32x8
-func (from Float64x4) AsInt32x8() (to Int32x8)
+// AsInt32x8 returns an Int32x8 with the same bit representation as x.
+func (x Float64x4) AsInt32x8() Int32x8
-// Int64x4 converts from Float64x4 to Int64x4
-func (from Float64x4) AsInt64x4() (to Int64x4)
+// AsInt64x4 returns an Int64x4 with the same bit representation as x.
+func (x Float64x4) AsInt64x4() Int64x4
-// Uint8x32 converts from Float64x4 to Uint8x32
-func (from Float64x4) AsUint8x32() (to Uint8x32)
+// AsUint8x32 returns a Uint8x32 with the same bit representation as x.
+func (x Float64x4) AsUint8x32() Uint8x32
-// Uint16x16 converts from Float64x4 to Uint16x16
-func (from Float64x4) AsUint16x16() (to Uint16x16)
+// AsUint16x16 returns a Uint16x16 with the same bit representation as x.
+func (x Float64x4) AsUint16x16() Uint16x16
-// Uint32x8 converts from Float64x4 to Uint32x8
-func (from Float64x4) AsUint32x8() (to Uint32x8)
+// AsUint32x8 returns a Uint32x8 with the same bit representation as x.
+func (x Float64x4) AsUint32x8() Uint32x8
-// Uint64x4 converts from Float64x4 to Uint64x4
-func (from Float64x4) AsUint64x4() (to Uint64x4)
+// AsUint64x4 returns a Uint64x4 with the same bit representation as x.
+func (x Float64x4) AsUint64x4() Uint64x4
-// Float32x16 converts from Float64x8 to Float32x16
-func (from Float64x8) AsFloat32x16() (to Float32x16)
+// AsFloat32x16 returns a Float32x16 with the same bit representation as x.
+func (x Float64x8) AsFloat32x16() Float32x16
-// Int8x64 converts from Float64x8 to Int8x64
-func (from Float64x8) AsInt8x64() (to Int8x64)
+// AsInt8x64 returns an Int8x64 with the same bit representation as x.
+func (x Float64x8) AsInt8x64() Int8x64
-// Int16x32 converts from Float64x8 to Int16x32
-func (from Float64x8) AsInt16x32() (to Int16x32)
+// AsInt16x32 returns an Int16x32 with the same bit representation as x.
+func (x Float64x8) AsInt16x32() Int16x32
-// Int32x16 converts from Float64x8 to Int32x16
-func (from Float64x8) AsInt32x16() (to Int32x16)
+// AsInt32x16 returns an Int32x16 with the same bit representation as x.
+func (x Float64x8) AsInt32x16() Int32x16
-// Int64x8 converts from Float64x8 to Int64x8
-func (from Float64x8) AsInt64x8() (to Int64x8)
+// AsInt64x8 returns an Int64x8 with the same bit representation as x.
+func (x Float64x8) AsInt64x8() Int64x8
-// Uint8x64 converts from Float64x8 to Uint8x64
-func (from Float64x8) AsUint8x64() (to Uint8x64)
+// AsUint8x64 returns a Uint8x64 with the same bit representation as x.
+func (x Float64x8) AsUint8x64() Uint8x64
-// Uint16x32 converts from Float64x8 to Uint16x32
-func (from Float64x8) AsUint16x32() (to Uint16x32)
+// AsUint16x32 returns a Uint16x32 with the same bit representation as x.
+func (x Float64x8) AsUint16x32() Uint16x32
-// Uint32x16 converts from Float64x8 to Uint32x16
-func (from Float64x8) AsUint32x16() (to Uint32x16)
+// AsUint32x16 returns a Uint32x16 with the same bit representation as x.
+func (x Float64x8) AsUint32x16() Uint32x16
-// Uint64x8 converts from Float64x8 to Uint64x8
-func (from Float64x8) AsUint64x8() (to Uint64x8)
+// AsUint64x8 returns a Uint64x8 with the same bit representation as x.
+func (x Float64x8) AsUint64x8() Uint64x8
-// Float32x4 converts from Int8x16 to Float32x4
-func (from Int8x16) AsFloat32x4() (to Float32x4)
+// AsFloat32x4 returns a Float32x4 with the same bit representation as x.
+func (x Int8x16) AsFloat32x4() Float32x4
-// Float64x2 converts from Int8x16 to Float64x2
-func (from Int8x16) AsFloat64x2() (to Float64x2)
+// AsFloat64x2 returns a Float64x2 with the same bit representation as x.
+func (x Int8x16) AsFloat64x2() Float64x2
-// Int16x8 converts from Int8x16 to Int16x8
-func (from Int8x16) AsInt16x8() (to Int16x8)
+// AsInt16x8 returns an Int16x8 with the same bit representation as x.
+func (x Int8x16) AsInt16x8() Int16x8
-// Int32x4 converts from Int8x16 to Int32x4
-func (from Int8x16) AsInt32x4() (to Int32x4)
+// AsInt32x4 returns an Int32x4 with the same bit representation as x.
+func (x Int8x16) AsInt32x4() Int32x4
-// Int64x2 converts from Int8x16 to Int64x2
-func (from Int8x16) AsInt64x2() (to Int64x2)
+// AsInt64x2 returns an Int64x2 with the same bit representation as x.
+func (x Int8x16) AsInt64x2() Int64x2
-// Uint8x16 converts from Int8x16 to Uint8x16
-func (from Int8x16) AsUint8x16() (to Uint8x16)
+// AsUint8x16 returns a Uint8x16 with the same bit representation as x.
+func (x Int8x16) AsUint8x16() Uint8x16
-// Uint16x8 converts from Int8x16 to Uint16x8
-func (from Int8x16) AsUint16x8() (to Uint16x8)
+// AsUint16x8 returns a Uint16x8 with the same bit representation as x.
+func (x Int8x16) AsUint16x8() Uint16x8
-// Uint32x4 converts from Int8x16 to Uint32x4
-func (from Int8x16) AsUint32x4() (to Uint32x4)
+// AsUint32x4 returns a Uint32x4 with the same bit representation as x.
+func (x Int8x16) AsUint32x4() Uint32x4
-// Uint64x2 converts from Int8x16 to Uint64x2
-func (from Int8x16) AsUint64x2() (to Uint64x2)
+// AsUint64x2 returns a Uint64x2 with the same bit representation as x.
+func (x Int8x16) AsUint64x2() Uint64x2
-// Float32x8 converts from Int8x32 to Float32x8
-func (from Int8x32) AsFloat32x8() (to Float32x8)
+// AsFloat32x8 returns a Float32x8 with the same bit representation as x.
+func (x Int8x32) AsFloat32x8() Float32x8
-// Float64x4 converts from Int8x32 to Float64x4
-func (from Int8x32) AsFloat64x4() (to Float64x4)
+// AsFloat64x4 returns a Float64x4 with the same bit representation as x.
+func (x Int8x32) AsFloat64x4() Float64x4
-// Int16x16 converts from Int8x32 to Int16x16
-func (from Int8x32) AsInt16x16() (to Int16x16)
+// AsInt16x16 returns an Int16x16 with the same bit representation as x.
+func (x Int8x32) AsInt16x16() Int16x16
-// Int32x8 converts from Int8x32 to Int32x8
-func (from Int8x32) AsInt32x8() (to Int32x8)
+// AsInt32x8 returns an Int32x8 with the same bit representation as x.
+func (x Int8x32) AsInt32x8() Int32x8
-// Int64x4 converts from Int8x32 to Int64x4
-func (from Int8x32) AsInt64x4() (to Int64x4)
+// AsInt64x4 returns an Int64x4 with the same bit representation as x.
+func (x Int8x32) AsInt64x4() Int64x4
-// Uint8x32 converts from Int8x32 to Uint8x32
-func (from Int8x32) AsUint8x32() (to Uint8x32)
+// AsUint8x32 returns a Uint8x32 with the same bit representation as x.
+func (x Int8x32) AsUint8x32() Uint8x32
-// Uint16x16 converts from Int8x32 to Uint16x16
-func (from Int8x32) AsUint16x16() (to Uint16x16)
+// AsUint16x16 returns a Uint16x16 with the same bit representation as x.
+func (x Int8x32) AsUint16x16() Uint16x16
-// Uint32x8 converts from Int8x32 to Uint32x8
-func (from Int8x32) AsUint32x8() (to Uint32x8)
+// AsUint32x8 returns a Uint32x8 with the same bit representation as x.
+func (x Int8x32) AsUint32x8() Uint32x8
-// Uint64x4 converts from Int8x32 to Uint64x4
-func (from Int8x32) AsUint64x4() (to Uint64x4)
+// AsUint64x4 returns a Uint64x4 with the same bit representation as x.
+func (x Int8x32) AsUint64x4() Uint64x4
-// Float32x16 converts from Int8x64 to Float32x16
-func (from Int8x64) AsFloat32x16() (to Float32x16)
+// AsFloat32x16 returns a Float32x16 with the same bit representation as x.
+func (x Int8x64) AsFloat32x16() Float32x16
-// Float64x8 converts from Int8x64 to Float64x8
-func (from Int8x64) AsFloat64x8() (to Float64x8)
+// AsFloat64x8 returns a Float64x8 with the same bit representation as x.
+func (x Int8x64) AsFloat64x8() Float64x8
-// Int16x32 converts from Int8x64 to Int16x32
-func (from Int8x64) AsInt16x32() (to Int16x32)
+// AsInt16x32 returns an Int16x32 with the same bit representation as x.
+func (x Int8x64) AsInt16x32() Int16x32
-// Int32x16 converts from Int8x64 to Int32x16
-func (from Int8x64) AsInt32x16() (to Int32x16)
+// AsInt32x16 returns an Int32x16 with the same bit representation as x.
+func (x Int8x64) AsInt32x16() Int32x16
-// Int64x8 converts from Int8x64 to Int64x8
-func (from Int8x64) AsInt64x8() (to Int64x8)
+// AsInt64x8 returns an Int64x8 with the same bit representation as x.
+func (x Int8x64) AsInt64x8() Int64x8
-// Uint8x64 converts from Int8x64 to Uint8x64
-func (from Int8x64) AsUint8x64() (to Uint8x64)
+// AsUint8x64 returns a Uint8x64 with the same bit representation as x.
+func (x Int8x64) AsUint8x64() Uint8x64
-// Uint16x32 converts from Int8x64 to Uint16x32
-func (from Int8x64) AsUint16x32() (to Uint16x32)
+// AsUint16x32 returns a Uint16x32 with the same bit representation as x.
+func (x Int8x64) AsUint16x32() Uint16x32
-// Uint32x16 converts from Int8x64 to Uint32x16
-func (from Int8x64) AsUint32x16() (to Uint32x16)
+// AsUint32x16 returns a Uint32x16 with the same bit representation as x.
+func (x Int8x64) AsUint32x16() Uint32x16
-// Uint64x8 converts from Int8x64 to Uint64x8
-func (from Int8x64) AsUint64x8() (to Uint64x8)
+// AsUint64x8 returns a Uint64x8 with the same bit representation as x.
+func (x Int8x64) AsUint64x8() Uint64x8
-// Float32x4 converts from Int16x8 to Float32x4
-func (from Int16x8) AsFloat32x4() (to Float32x4)
+// AsFloat32x4 returns a Float32x4 with the same bit representation as x.
+func (x Int16x8) AsFloat32x4() Float32x4
-// Float64x2 converts from Int16x8 to Float64x2
-func (from Int16x8) AsFloat64x2() (to Float64x2)
+// AsFloat64x2 returns a Float64x2 with the same bit representation as x.
+func (x Int16x8) AsFloat64x2() Float64x2
-// Int8x16 converts from Int16x8 to Int8x16
-func (from Int16x8) AsInt8x16() (to Int8x16)
+// AsInt8x16 returns an Int8x16 with the same bit representation as x.
+func (x Int16x8) AsInt8x16() Int8x16
-// Int32x4 converts from Int16x8 to Int32x4
-func (from Int16x8) AsInt32x4() (to Int32x4)
+// AsInt32x4 returns an Int32x4 with the same bit representation as x.
+func (x Int16x8) AsInt32x4() Int32x4
-// Int64x2 converts from Int16x8 to Int64x2
-func (from Int16x8) AsInt64x2() (to Int64x2)
+// AsInt64x2 returns an Int64x2 with the same bit representation as x.
+func (x Int16x8) AsInt64x2() Int64x2
-// Uint8x16 converts from Int16x8 to Uint8x16
-func (from Int16x8) AsUint8x16() (to Uint8x16)
+// AsUint8x16 returns a Uint8x16 with the same bit representation as x.
+func (x Int16x8) AsUint8x16() Uint8x16
-// Uint16x8 converts from Int16x8 to Uint16x8
-func (from Int16x8) AsUint16x8() (to Uint16x8)
+// AsUint16x8 returns a Uint16x8 with the same bit representation as x.
+func (x Int16x8) AsUint16x8() Uint16x8
-// Uint32x4 converts from Int16x8 to Uint32x4
-func (from Int16x8) AsUint32x4() (to Uint32x4)
+// AsUint32x4 returns a Uint32x4 with the same bit representation as x.
+func (x Int16x8) AsUint32x4() Uint32x4
-// Uint64x2 converts from Int16x8 to Uint64x2
-func (from Int16x8) AsUint64x2() (to Uint64x2)
+// AsUint64x2 returns a Uint64x2 with the same bit representation as x.
+func (x Int16x8) AsUint64x2() Uint64x2
-// Float32x8 converts from Int16x16 to Float32x8
-func (from Int16x16) AsFloat32x8() (to Float32x8)
+// AsFloat32x8 returns a Float32x8 with the same bit representation as x.
+func (x Int16x16) AsFloat32x8() Float32x8
-// Float64x4 converts from Int16x16 to Float64x4
-func (from Int16x16) AsFloat64x4() (to Float64x4)
+// AsFloat64x4 returns a Float64x4 with the same bit representation as x.
+func (x Int16x16) AsFloat64x4() Float64x4
-// Int8x32 converts from Int16x16 to Int8x32
-func (from Int16x16) AsInt8x32() (to Int8x32)
+// AsInt8x32 returns an Int8x32 with the same bit representation as x.
+func (x Int16x16) AsInt8x32() Int8x32
-// Int32x8 converts from Int16x16 to Int32x8
-func (from Int16x16) AsInt32x8() (to Int32x8)
+// AsInt32x8 returns an Int32x8 with the same bit representation as x.
+func (x Int16x16) AsInt32x8() Int32x8
-// Int64x4 converts from Int16x16 to Int64x4
-func (from Int16x16) AsInt64x4() (to Int64x4)
+// AsInt64x4 returns an Int64x4 with the same bit representation as x.
+func (x Int16x16) AsInt64x4() Int64x4
-// Uint8x32 converts from Int16x16 to Uint8x32
-func (from Int16x16) AsUint8x32() (to Uint8x32)
+// AsUint8x32 returns a Uint8x32 with the same bit representation as x.
+func (x Int16x16) AsUint8x32() Uint8x32
-// Uint16x16 converts from Int16x16 to Uint16x16
-func (from Int16x16) AsUint16x16() (to Uint16x16)
+// AsUint16x16 returns a Uint16x16 with the same bit representation as x.
+func (x Int16x16) AsUint16x16() Uint16x16
-// Uint32x8 converts from Int16x16 to Uint32x8
-func (from Int16x16) AsUint32x8() (to Uint32x8)
+// AsUint32x8 returns a Uint32x8 with the same bit representation as x.
+func (x Int16x16) AsUint32x8() Uint32x8
-// Uint64x4 converts from Int16x16 to Uint64x4
-func (from Int16x16) AsUint64x4() (to Uint64x4)
+// AsUint64x4 returns a Uint64x4 with the same bit representation as x.
+func (x Int16x16) AsUint64x4() Uint64x4
-// Float32x16 converts from Int16x32 to Float32x16
-func (from Int16x32) AsFloat32x16() (to Float32x16)
+// AsFloat32x16 returns a Float32x16 with the same bit representation as x.
+func (x Int16x32) AsFloat32x16() Float32x16
-// Float64x8 converts from Int16x32 to Float64x8
-func (from Int16x32) AsFloat64x8() (to Float64x8)
+// AsFloat64x8 returns a Float64x8 with the same bit representation as x.
+func (x Int16x32) AsFloat64x8() Float64x8
-// Int8x64 converts from Int16x32 to Int8x64
-func (from Int16x32) AsInt8x64() (to Int8x64)
+// AsInt8x64 returns an Int8x64 with the same bit representation as x.
+func (x Int16x32) AsInt8x64() Int8x64
-// Int32x16 converts from Int16x32 to Int32x16
-func (from Int16x32) AsInt32x16() (to Int32x16)
+// AsInt32x16 returns an Int32x16 with the same bit representation as x.
+func (x Int16x32) AsInt32x16() Int32x16
-// Int64x8 converts from Int16x32 to Int64x8
-func (from Int16x32) AsInt64x8() (to Int64x8)
+// AsInt64x8 returns an Int64x8 with the same bit representation as x.
+func (x Int16x32) AsInt64x8() Int64x8
-// Uint8x64 converts from Int16x32 to Uint8x64
-func (from Int16x32) AsUint8x64() (to Uint8x64)
+// AsUint8x64 returns a Uint8x64 with the same bit representation as x.
+func (x Int16x32) AsUint8x64() Uint8x64
-// Uint16x32 converts from Int16x32 to Uint16x32
-func (from Int16x32) AsUint16x32() (to Uint16x32)
+// AsUint16x32 returns a Uint16x32 with the same bit representation as x.
+func (x Int16x32) AsUint16x32() Uint16x32
-// Uint32x16 converts from Int16x32 to Uint32x16
-func (from Int16x32) AsUint32x16() (to Uint32x16)
+// AsUint32x16 returns a Uint32x16 with the same bit representation as x.
+func (x Int16x32) AsUint32x16() Uint32x16
-// Uint64x8 converts from Int16x32 to Uint64x8
-func (from Int16x32) AsUint64x8() (to Uint64x8)
+// AsUint64x8 returns a Uint64x8 with the same bit representation as x.
+func (x Int16x32) AsUint64x8() Uint64x8
-// Float32x4 converts from Int32x4 to Float32x4
-func (from Int32x4) AsFloat32x4() (to Float32x4)
+// AsFloat32x4 returns a Float32x4 with the same bit representation as x.
+func (x Int32x4) AsFloat32x4() Float32x4
-// Float64x2 converts from Int32x4 to Float64x2
-func (from Int32x4) AsFloat64x2() (to Float64x2)
+// AsFloat64x2 returns a Float64x2 with the same bit representation as x.
+func (x Int32x4) AsFloat64x2() Float64x2
-// Int8x16 converts from Int32x4 to Int8x16
-func (from Int32x4) AsInt8x16() (to Int8x16)
+// AsInt8x16 returns an Int8x16 with the same bit representation as x.
+func (x Int32x4) AsInt8x16() Int8x16
-// Int16x8 converts from Int32x4 to Int16x8
-func (from Int32x4) AsInt16x8() (to Int16x8)
+// AsInt16x8 returns an Int16x8 with the same bit representation as x.
+func (x Int32x4) AsInt16x8() Int16x8
-// Int64x2 converts from Int32x4 to Int64x2
-func (from Int32x4) AsInt64x2() (to Int64x2)
+// AsInt64x2 returns an Int64x2 with the same bit representation as x.
+func (x Int32x4) AsInt64x2() Int64x2
-// Uint8x16 converts from Int32x4 to Uint8x16
-func (from Int32x4) AsUint8x16() (to Uint8x16)
+// AsUint8x16 returns a Uint8x16 with the same bit representation as x.
+func (x Int32x4) AsUint8x16() Uint8x16
-// Uint16x8 converts from Int32x4 to Uint16x8
-func (from Int32x4) AsUint16x8() (to Uint16x8)
+// AsUint16x8 returns a Uint16x8 with the same bit representation as x.
+func (x Int32x4) AsUint16x8() Uint16x8
-// Uint32x4 converts from Int32x4 to Uint32x4
-func (from Int32x4) AsUint32x4() (to Uint32x4)
+// AsUint32x4 returns a Uint32x4 with the same bit representation as x.
+func (x Int32x4) AsUint32x4() Uint32x4
-// Uint64x2 converts from Int32x4 to Uint64x2
-func (from Int32x4) AsUint64x2() (to Uint64x2)
+// AsUint64x2 returns a Uint64x2 with the same bit representation as x.
+func (x Int32x4) AsUint64x2() Uint64x2
-// Float32x8 converts from Int32x8 to Float32x8
-func (from Int32x8) AsFloat32x8() (to Float32x8)
+// AsFloat32x8 returns a Float32x8 with the same bit representation as x.
+func (x Int32x8) AsFloat32x8() Float32x8
-// Float64x4 converts from Int32x8 to Float64x4
-func (from Int32x8) AsFloat64x4() (to Float64x4)
+// AsFloat64x4 returns a Float64x4 with the same bit representation as x.
+func (x Int32x8) AsFloat64x4() Float64x4
-// Int8x32 converts from Int32x8 to Int8x32
-func (from Int32x8) AsInt8x32() (to Int8x32)
+// AsInt8x32 returns an Int8x32 with the same bit representation as x.
+func (x Int32x8) AsInt8x32() Int8x32
-// Int16x16 converts from Int32x8 to Int16x16
-func (from Int32x8) AsInt16x16() (to Int16x16)
+// AsInt16x16 returns an Int16x16 with the same bit representation as x.
+func (x Int32x8) AsInt16x16() Int16x16
-// Int64x4 converts from Int32x8 to Int64x4
-func (from Int32x8) AsInt64x4() (to Int64x4)
+// AsInt64x4 returns an Int64x4 with the same bit representation as x.
+func (x Int32x8) AsInt64x4() Int64x4
-// Uint8x32 converts from Int32x8 to Uint8x32
-func (from Int32x8) AsUint8x32() (to Uint8x32)
+// AsUint8x32 returns a Uint8x32 with the same bit representation as x.
+func (x Int32x8) AsUint8x32() Uint8x32
-// Uint16x16 converts from Int32x8 to Uint16x16
-func (from Int32x8) AsUint16x16() (to Uint16x16)
+// AsUint16x16 returns a Uint16x16 with the same bit representation as x.
+func (x Int32x8) AsUint16x16() Uint16x16
-// Uint32x8 converts from Int32x8 to Uint32x8
-func (from Int32x8) AsUint32x8() (to Uint32x8)
+// AsUint32x8 returns a Uint32x8 with the same bit representation as x.
+func (x Int32x8) AsUint32x8() Uint32x8
-// Uint64x4 converts from Int32x8 to Uint64x4
-func (from Int32x8) AsUint64x4() (to Uint64x4)
+// AsUint64x4 returns a Uint64x4 with the same bit representation as x.
+func (x Int32x8) AsUint64x4() Uint64x4
-// Float32x16 converts from Int32x16 to Float32x16
-func (from Int32x16) AsFloat32x16() (to Float32x16)
+// AsFloat32x16 returns a Float32x16 with the same bit representation as x.
+func (x Int32x16) AsFloat32x16() Float32x16
-// Float64x8 converts from Int32x16 to Float64x8
-func (from Int32x16) AsFloat64x8() (to Float64x8)
+// AsFloat64x8 returns a Float64x8 with the same bit representation as x.
+func (x Int32x16) AsFloat64x8() Float64x8
-// Int8x64 converts from Int32x16 to Int8x64
-func (from Int32x16) AsInt8x64() (to Int8x64)
+// AsInt8x64 returns an Int8x64 with the same bit representation as x.
+func (x Int32x16) AsInt8x64() Int8x64
-// Int16x32 converts from Int32x16 to Int16x32
-func (from Int32x16) AsInt16x32() (to Int16x32)
+// AsInt16x32 returns an Int16x32 with the same bit representation as x.
+func (x Int32x16) AsInt16x32() Int16x32
-// Int64x8 converts from Int32x16 to Int64x8
-func (from Int32x16) AsInt64x8() (to Int64x8)
+// AsInt64x8 returns an Int64x8 with the same bit representation as x.
+func (x Int32x16) AsInt64x8() Int64x8
-// Uint8x64 converts from Int32x16 to Uint8x64
-func (from Int32x16) AsUint8x64() (to Uint8x64)
+// AsUint8x64 returns a Uint8x64 with the same bit representation as x.
+func (x Int32x16) AsUint8x64() Uint8x64
-// Uint16x32 converts from Int32x16 to Uint16x32
-func (from Int32x16) AsUint16x32() (to Uint16x32)
+// AsUint16x32 returns a Uint16x32 with the same bit representation as x.
+func (x Int32x16) AsUint16x32() Uint16x32
-// Uint32x16 converts from Int32x16 to Uint32x16
-func (from Int32x16) AsUint32x16() (to Uint32x16)
+// AsUint32x16 returns a Uint32x16 with the same bit representation as x.
+func (x Int32x16) AsUint32x16() Uint32x16
-// Uint64x8 converts from Int32x16 to Uint64x8
-func (from Int32x16) AsUint64x8() (to Uint64x8)
+// AsUint64x8 returns a Uint64x8 with the same bit representation as x.
+func (x Int32x16) AsUint64x8() Uint64x8
-// Float32x4 converts from Int64x2 to Float32x4
-func (from Int64x2) AsFloat32x4() (to Float32x4)
+// AsFloat32x4 returns a Float32x4 with the same bit representation as x.
+func (x Int64x2) AsFloat32x4() Float32x4
-// Float64x2 converts from Int64x2 to Float64x2
-func (from Int64x2) AsFloat64x2() (to Float64x2)
+// AsFloat64x2 returns a Float64x2 with the same bit representation as x.
+func (x Int64x2) AsFloat64x2() Float64x2
-// Int8x16 converts from Int64x2 to Int8x16
-func (from Int64x2) AsInt8x16() (to Int8x16)
+// AsInt8x16 returns an Int8x16 with the same bit representation as x.
+func (x Int64x2) AsInt8x16() Int8x16
-// Int16x8 converts from Int64x2 to Int16x8
-func (from Int64x2) AsInt16x8() (to Int16x8)
+// AsInt16x8 returns an Int16x8 with the same bit representation as x.
+func (x Int64x2) AsInt16x8() Int16x8
-// Int32x4 converts from Int64x2 to Int32x4
-func (from Int64x2) AsInt32x4() (to Int32x4)
+// AsInt32x4 returns an Int32x4 with the same bit representation as x.
+func (x Int64x2) AsInt32x4() Int32x4
-// Uint8x16 converts from Int64x2 to Uint8x16
-func (from Int64x2) AsUint8x16() (to Uint8x16)
+// AsUint8x16 returns a Uint8x16 with the same bit representation as x.
+func (x Int64x2) AsUint8x16() Uint8x16
-// Uint16x8 converts from Int64x2 to Uint16x8
-func (from Int64x2) AsUint16x8() (to Uint16x8)
+// AsUint16x8 returns a Uint16x8 with the same bit representation as x.
+func (x Int64x2) AsUint16x8() Uint16x8
-// Uint32x4 converts from Int64x2 to Uint32x4
-func (from Int64x2) AsUint32x4() (to Uint32x4)
+// AsUint32x4 returns a Uint32x4 with the same bit representation as x.
+func (x Int64x2) AsUint32x4() Uint32x4
-// Uint64x2 converts from Int64x2 to Uint64x2
-func (from Int64x2) AsUint64x2() (to Uint64x2)
+// AsUint64x2 returns a Uint64x2 with the same bit representation as x.
+func (x Int64x2) AsUint64x2() Uint64x2
-// Float32x8 converts from Int64x4 to Float32x8
-func (from Int64x4) AsFloat32x8() (to Float32x8)
+// AsFloat32x8 returns a Float32x8 with the same bit representation as x.
+func (x Int64x4) AsFloat32x8() Float32x8
-// Float64x4 converts from Int64x4 to Float64x4
-func (from Int64x4) AsFloat64x4() (to Float64x4)
+// AsFloat64x4 returns a Float64x4 with the same bit representation as x.
+func (x Int64x4) AsFloat64x4() Float64x4
-// Int8x32 converts from Int64x4 to Int8x32
-func (from Int64x4) AsInt8x32() (to Int8x32)
+// AsInt8x32 returns an Int8x32 with the same bit representation as x.
+func (x Int64x4) AsInt8x32() Int8x32
-// Int16x16 converts from Int64x4 to Int16x16
-func (from Int64x4) AsInt16x16() (to Int16x16)
+// AsInt16x16 returns an Int16x16 with the same bit representation as x.
+func (x Int64x4) AsInt16x16() Int16x16
-// Int32x8 converts from Int64x4 to Int32x8
-func (from Int64x4) AsInt32x8() (to Int32x8)
+// AsInt32x8 returns an Int32x8 with the same bit representation as x.
+func (x Int64x4) AsInt32x8() Int32x8
-// Uint8x32 converts from Int64x4 to Uint8x32
-func (from Int64x4) AsUint8x32() (to Uint8x32)
+// AsUint8x32 returns a Uint8x32 with the same bit representation as x.
+func (x Int64x4) AsUint8x32() Uint8x32
-// Uint16x16 converts from Int64x4 to Uint16x16
-func (from Int64x4) AsUint16x16() (to Uint16x16)
+// AsUint16x16 returns a Uint16x16 with the same bit representation as x.
+func (x Int64x4) AsUint16x16() Uint16x16
-// Uint32x8 converts from Int64x4 to Uint32x8
-func (from Int64x4) AsUint32x8() (to Uint32x8)
+// AsUint32x8 returns a Uint32x8 with the same bit representation as x.
+func (x Int64x4) AsUint32x8() Uint32x8
-// Uint64x4 converts from Int64x4 to Uint64x4
-func (from Int64x4) AsUint64x4() (to Uint64x4)
+// AsUint64x4 returns a Uint64x4 with the same bit representation as x.
+func (x Int64x4) AsUint64x4() Uint64x4
-// Float32x16 converts from Int64x8 to Float32x16
-func (from Int64x8) AsFloat32x16() (to Float32x16)
+// AsFloat32x16 returns a Float32x16 with the same bit representation as x.
+func (x Int64x8) AsFloat32x16() Float32x16
-// Float64x8 converts from Int64x8 to Float64x8
-func (from Int64x8) AsFloat64x8() (to Float64x8)
+// AsFloat64x8 returns a Float64x8 with the same bit representation as x.
+func (x Int64x8) AsFloat64x8() Float64x8
-// Int8x64 converts from Int64x8 to Int8x64
-func (from Int64x8) AsInt8x64() (to Int8x64)
+// AsInt8x64 returns an Int8x64 with the same bit representation as x.
+func (x Int64x8) AsInt8x64() Int8x64
-// Int16x32 converts from Int64x8 to Int16x32
-func (from Int64x8) AsInt16x32() (to Int16x32)
+// AsInt16x32 returns an Int16x32 with the same bit representation as x.
+func (x Int64x8) AsInt16x32() Int16x32
-// Int32x16 converts from Int64x8 to Int32x16
-func (from Int64x8) AsInt32x16() (to Int32x16)
+// AsInt32x16 returns an Int32x16 with the same bit representation as x.
+func (x Int64x8) AsInt32x16() Int32x16
-// Uint8x64 converts from Int64x8 to Uint8x64
-func (from Int64x8) AsUint8x64() (to Uint8x64)
+// AsUint8x64 returns a Uint8x64 with the same bit representation as x.
+func (x Int64x8) AsUint8x64() Uint8x64
-// Uint16x32 converts from Int64x8 to Uint16x32
-func (from Int64x8) AsUint16x32() (to Uint16x32)
+// AsUint16x32 returns a Uint16x32 with the same bit representation as x.
+func (x Int64x8) AsUint16x32() Uint16x32
-// Uint32x16 converts from Int64x8 to Uint32x16
-func (from Int64x8) AsUint32x16() (to Uint32x16)
+// AsUint32x16 returns a Uint32x16 with the same bit representation as x.
+func (x Int64x8) AsUint32x16() Uint32x16
-// Uint64x8 converts from Int64x8 to Uint64x8
-func (from Int64x8) AsUint64x8() (to Uint64x8)
+// AsUint64x8 returns a Uint64x8 with the same bit representation as x.
+func (x Int64x8) AsUint64x8() Uint64x8
-// Float32x4 converts from Uint8x16 to Float32x4
-func (from Uint8x16) AsFloat32x4() (to Float32x4)
+// AsFloat32x4 returns a Float32x4 with the same bit representation as x.
+func (x Uint8x16) AsFloat32x4() Float32x4
-// Float64x2 converts from Uint8x16 to Float64x2
-func (from Uint8x16) AsFloat64x2() (to Float64x2)
+// AsFloat64x2 returns a Float64x2 with the same bit representation as x.
+func (x Uint8x16) AsFloat64x2() Float64x2
-// Int8x16 converts from Uint8x16 to Int8x16
-func (from Uint8x16) AsInt8x16() (to Int8x16)
+// AsInt8x16 returns an Int8x16 with the same bit representation as x.
+func (x Uint8x16) AsInt8x16() Int8x16
-// Int16x8 converts from Uint8x16 to Int16x8
-func (from Uint8x16) AsInt16x8() (to Int16x8)
+// AsInt16x8 returns an Int16x8 with the same bit representation as x.
+func (x Uint8x16) AsInt16x8() Int16x8
-// Int32x4 converts from Uint8x16 to Int32x4
-func (from Uint8x16) AsInt32x4() (to Int32x4)
+// AsInt32x4 returns an Int32x4 with the same bit representation as x.
+func (x Uint8x16) AsInt32x4() Int32x4
-// Int64x2 converts from Uint8x16 to Int64x2
-func (from Uint8x16) AsInt64x2() (to Int64x2)
+// AsInt64x2 returns an Int64x2 with the same bit representation as x.
+func (x Uint8x16) AsInt64x2() Int64x2
-// Uint16x8 converts from Uint8x16 to Uint16x8
-func (from Uint8x16) AsUint16x8() (to Uint16x8)
+// AsUint16x8 returns a Uint16x8 with the same bit representation as x.
+func (x Uint8x16) AsUint16x8() Uint16x8
-// Uint32x4 converts from Uint8x16 to Uint32x4
-func (from Uint8x16) AsUint32x4() (to Uint32x4)
+// AsUint32x4 returns a Uint32x4 with the same bit representation as x.
+func (x Uint8x16) AsUint32x4() Uint32x4
-// Uint64x2 converts from Uint8x16 to Uint64x2
-func (from Uint8x16) AsUint64x2() (to Uint64x2)
+// AsUint64x2 returns a Uint64x2 with the same bit representation as x.
+func (x Uint8x16) AsUint64x2() Uint64x2
-// Float32x8 converts from Uint8x32 to Float32x8
-func (from Uint8x32) AsFloat32x8() (to Float32x8)
+// AsFloat32x8 returns a Float32x8 with the same bit representation as x.
+func (x Uint8x32) AsFloat32x8() Float32x8
-// Float64x4 converts from Uint8x32 to Float64x4
-func (from Uint8x32) AsFloat64x4() (to Float64x4)
+// AsFloat64x4 returns a Float64x4 with the same bit representation as x.
+func (x Uint8x32) AsFloat64x4() Float64x4
-// Int8x32 converts from Uint8x32 to Int8x32
-func (from Uint8x32) AsInt8x32() (to Int8x32)
+// AsInt8x32 returns an Int8x32 with the same bit representation as x.
+func (x Uint8x32) AsInt8x32() Int8x32
-// Int16x16 converts from Uint8x32 to Int16x16
-func (from Uint8x32) AsInt16x16() (to Int16x16)
+// AsInt16x16 returns an Int16x16 with the same bit representation as x.
+func (x Uint8x32) AsInt16x16() Int16x16
-// Int32x8 converts from Uint8x32 to Int32x8
-func (from Uint8x32) AsInt32x8() (to Int32x8)
+// AsInt32x8 returns an Int32x8 with the same bit representation as x.
+func (x Uint8x32) AsInt32x8() Int32x8
-// Int64x4 converts from Uint8x32 to Int64x4
-func (from Uint8x32) AsInt64x4() (to Int64x4)
+// AsInt64x4 returns an Int64x4 with the same bit representation as x.
+func (x Uint8x32) AsInt64x4() Int64x4
-// Uint16x16 converts from Uint8x32 to Uint16x16
-func (from Uint8x32) AsUint16x16() (to Uint16x16)
+// AsUint16x16 returns a Uint16x16 with the same bit representation as x.
+func (x Uint8x32) AsUint16x16() Uint16x16
-// Uint32x8 converts from Uint8x32 to Uint32x8
-func (from Uint8x32) AsUint32x8() (to Uint32x8)
+// AsUint32x8 returns a Uint32x8 with the same bit representation as x.
+func (x Uint8x32) AsUint32x8() Uint32x8
-// Uint64x4 converts from Uint8x32 to Uint64x4
-func (from Uint8x32) AsUint64x4() (to Uint64x4)
+// AsUint64x4 returns a Uint64x4 with the same bit representation as x.
+func (x Uint8x32) AsUint64x4() Uint64x4
-// Float32x16 converts from Uint8x64 to Float32x16
-func (from Uint8x64) AsFloat32x16() (to Float32x16)
+// AsFloat32x16 returns a Float32x16 with the same bit representation as x.
+func (x Uint8x64) AsFloat32x16() Float32x16
-// Float64x8 converts from Uint8x64 to Float64x8
-func (from Uint8x64) AsFloat64x8() (to Float64x8)
+// AsFloat64x8 returns a Float64x8 with the same bit representation as x.
+func (x Uint8x64) AsFloat64x8() Float64x8
-// Int8x64 converts from Uint8x64 to Int8x64
-func (from Uint8x64) AsInt8x64() (to Int8x64)
+// AsInt8x64 returns an Int8x64 with the same bit representation as x.
+func (x Uint8x64) AsInt8x64() Int8x64
-// Int16x32 converts from Uint8x64 to Int16x32
-func (from Uint8x64) AsInt16x32() (to Int16x32)
+// AsInt16x32 returns an Int16x32 with the same bit representation as x.
+func (x Uint8x64) AsInt16x32() Int16x32
-// Int32x16 converts from Uint8x64 to Int32x16
-func (from Uint8x64) AsInt32x16() (to Int32x16)
+// AsInt32x16 returns an Int32x16 with the same bit representation as x.
+func (x Uint8x64) AsInt32x16() Int32x16
-// Int64x8 converts from Uint8x64 to Int64x8
-func (from Uint8x64) AsInt64x8() (to Int64x8)
+// AsInt64x8 returns an Int64x8 with the same bit representation as x.
+func (x Uint8x64) AsInt64x8() Int64x8
-// Uint16x32 converts from Uint8x64 to Uint16x32
-func (from Uint8x64) AsUint16x32() (to Uint16x32)
+// AsUint16x32 returns a Uint16x32 with the same bit representation as x.
+func (x Uint8x64) AsUint16x32() Uint16x32
-// Uint32x16 converts from Uint8x64 to Uint32x16
-func (from Uint8x64) AsUint32x16() (to Uint32x16)
+// AsUint32x16 returns a Uint32x16 with the same bit representation as x.
+func (x Uint8x64) AsUint32x16() Uint32x16
-// Uint64x8 converts from Uint8x64 to Uint64x8
-func (from Uint8x64) AsUint64x8() (to Uint64x8)
+// AsUint64x8 returns a Uint64x8 with the same bit representation as x.
+func (x Uint8x64) AsUint64x8() Uint64x8
-// Float32x4 converts from Uint16x8 to Float32x4
-func (from Uint16x8) AsFloat32x4() (to Float32x4)
+// AsFloat32x4 returns a Float32x4 with the same bit representation as x.
+func (x Uint16x8) AsFloat32x4() Float32x4
-// Float64x2 converts from Uint16x8 to Float64x2
-func (from Uint16x8) AsFloat64x2() (to Float64x2)
+// AsFloat64x2 returns a Float64x2 with the same bit representation as x.
+func (x Uint16x8) AsFloat64x2() Float64x2
-// Int8x16 converts from Uint16x8 to Int8x16
-func (from Uint16x8) AsInt8x16() (to Int8x16)
+// AsInt8x16 returns an Int8x16 with the same bit representation as x.
+func (x Uint16x8) AsInt8x16() Int8x16
-// Int16x8 converts from Uint16x8 to Int16x8
-func (from Uint16x8) AsInt16x8() (to Int16x8)
+// AsInt16x8 returns an Int16x8 with the same bit representation as x.
+func (x Uint16x8) AsInt16x8() Int16x8
-// Int32x4 converts from Uint16x8 to Int32x4
-func (from Uint16x8) AsInt32x4() (to Int32x4)
+// AsInt32x4 returns an Int32x4 with the same bit representation as x.
+func (x Uint16x8) AsInt32x4() Int32x4
-// Int64x2 converts from Uint16x8 to Int64x2
-func (from Uint16x8) AsInt64x2() (to Int64x2)
+// AsInt64x2 returns an Int64x2 with the same bit representation as x.
+func (x Uint16x8) AsInt64x2() Int64x2
-// Uint8x16 converts from Uint16x8 to Uint8x16
-func (from Uint16x8) AsUint8x16() (to Uint8x16)
+// AsUint8x16 returns a Uint8x16 with the same bit representation as x.
+func (x Uint16x8) AsUint8x16() Uint8x16
-// Uint32x4 converts from Uint16x8 to Uint32x4
-func (from Uint16x8) AsUint32x4() (to Uint32x4)
+// AsUint32x4 returns a Uint32x4 with the same bit representation as x.
+func (x Uint16x8) AsUint32x4() Uint32x4
-// Uint64x2 converts from Uint16x8 to Uint64x2
-func (from Uint16x8) AsUint64x2() (to Uint64x2)
+// AsUint64x2 returns a Uint64x2 with the same bit representation as x.
+func (x Uint16x8) AsUint64x2() Uint64x2
-// Float32x8 converts from Uint16x16 to Float32x8
-func (from Uint16x16) AsFloat32x8() (to Float32x8)
+// AsFloat32x8 returns a Float32x8 with the same bit representation as x.
+func (x Uint16x16) AsFloat32x8() Float32x8
-// Float64x4 converts from Uint16x16 to Float64x4
-func (from Uint16x16) AsFloat64x4() (to Float64x4)
+// AsFloat64x4 returns a Float64x4 with the same bit representation as x.
+func (x Uint16x16) AsFloat64x4() Float64x4
-// Int8x32 converts from Uint16x16 to Int8x32
-func (from Uint16x16) AsInt8x32() (to Int8x32)
+// AsInt8x32 returns an Int8x32 with the same bit representation as x.
+func (x Uint16x16) AsInt8x32() Int8x32
-// Int16x16 converts from Uint16x16 to Int16x16
-func (from Uint16x16) AsInt16x16() (to Int16x16)
+// AsInt16x16 returns an Int16x16 with the same bit representation as x.
+func (x Uint16x16) AsInt16x16() Int16x16
-// Int32x8 converts from Uint16x16 to Int32x8
-func (from Uint16x16) AsInt32x8() (to Int32x8)
+// AsInt32x8 returns an Int32x8 with the same bit representation as x.
+func (x Uint16x16) AsInt32x8() Int32x8
-// Int64x4 converts from Uint16x16 to Int64x4
-func (from Uint16x16) AsInt64x4() (to Int64x4)
+// AsInt64x4 returns an Int64x4 with the same bit representation as x.
+func (x Uint16x16) AsInt64x4() Int64x4
-// Uint8x32 converts from Uint16x16 to Uint8x32
-func (from Uint16x16) AsUint8x32() (to Uint8x32)
+// AsUint8x32 returns a Uint8x32 with the same bit representation as x.
+func (x Uint16x16) AsUint8x32() Uint8x32
-// Uint32x8 converts from Uint16x16 to Uint32x8
-func (from Uint16x16) AsUint32x8() (to Uint32x8)
+// AsUint32x8 returns a Uint32x8 with the same bit representation as x.
+func (x Uint16x16) AsUint32x8() Uint32x8
-// Uint64x4 converts from Uint16x16 to Uint64x4
-func (from Uint16x16) AsUint64x4() (to Uint64x4)
+// AsUint64x4 returns a Uint64x4 with the same bit representation as x.
+func (x Uint16x16) AsUint64x4() Uint64x4
-// Float32x16 converts from Uint16x32 to Float32x16
-func (from Uint16x32) AsFloat32x16() (to Float32x16)
+// AsFloat32x16 returns a Float32x16 with the same bit representation as x.
+func (x Uint16x32) AsFloat32x16() Float32x16
-// Float64x8 converts from Uint16x32 to Float64x8
-func (from Uint16x32) AsFloat64x8() (to Float64x8)
+// AsFloat64x8 returns a Float64x8 with the same bit representation as x.
+func (x Uint16x32) AsFloat64x8() Float64x8
-// Int8x64 converts from Uint16x32 to Int8x64
-func (from Uint16x32) AsInt8x64() (to Int8x64)
+// AsInt8x64 returns an Int8x64 with the same bit representation as x.
+func (x Uint16x32) AsInt8x64() Int8x64
-// Int16x32 converts from Uint16x32 to Int16x32
-func (from Uint16x32) AsInt16x32() (to Int16x32)
+// AsInt16x32 returns an Int16x32 with the same bit representation as x.
+func (x Uint16x32) AsInt16x32() Int16x32
-// Int32x16 converts from Uint16x32 to Int32x16
-func (from Uint16x32) AsInt32x16() (to Int32x16)
+// AsInt32x16 returns an Int32x16 with the same bit representation as x.
+func (x Uint16x32) AsInt32x16() Int32x16
-// Int64x8 converts from Uint16x32 to Int64x8
-func (from Uint16x32) AsInt64x8() (to Int64x8)
+// AsInt64x8 returns an Int64x8 with the same bit representation as x.
+func (x Uint16x32) AsInt64x8() Int64x8
-// Uint8x64 converts from Uint16x32 to Uint8x64
-func (from Uint16x32) AsUint8x64() (to Uint8x64)
+// AsUint8x64 returns a Uint8x64 with the same bit representation as x.
+func (x Uint16x32) AsUint8x64() Uint8x64
-// Uint32x16 converts from Uint16x32 to Uint32x16
-func (from Uint16x32) AsUint32x16() (to Uint32x16)
+// AsUint32x16 returns a Uint32x16 with the same bit representation as x.
+func (x Uint16x32) AsUint32x16() Uint32x16
-// Uint64x8 converts from Uint16x32 to Uint64x8
-func (from Uint16x32) AsUint64x8() (to Uint64x8)
+// AsUint64x8 returns a Uint64x8 with the same bit representation as x.
+func (x Uint16x32) AsUint64x8() Uint64x8
-// Float32x4 converts from Uint32x4 to Float32x4
-func (from Uint32x4) AsFloat32x4() (to Float32x4)
+// AsFloat32x4 returns a Float32x4 with the same bit representation as x.
+func (x Uint32x4) AsFloat32x4() Float32x4
-// Float64x2 converts from Uint32x4 to Float64x2
-func (from Uint32x4) AsFloat64x2() (to Float64x2)
+// AsFloat64x2 returns a Float64x2 with the same bit representation as x.
+func (x Uint32x4) AsFloat64x2() Float64x2
-// Int8x16 converts from Uint32x4 to Int8x16
-func (from Uint32x4) AsInt8x16() (to Int8x16)
+// AsInt8x16 returns an Int8x16 with the same bit representation as x.
+func (x Uint32x4) AsInt8x16() Int8x16
-// Int16x8 converts from Uint32x4 to Int16x8
-func (from Uint32x4) AsInt16x8() (to Int16x8)
+// AsInt16x8 returns an Int16x8 with the same bit representation as x.
+func (x Uint32x4) AsInt16x8() Int16x8
-// Int32x4 converts from Uint32x4 to Int32x4
-func (from Uint32x4) AsInt32x4() (to Int32x4)
+// AsInt32x4 returns an Int32x4 with the same bit representation as x.
+func (x Uint32x4) AsInt32x4() Int32x4
-// Int64x2 converts from Uint32x4 to Int64x2
-func (from Uint32x4) AsInt64x2() (to Int64x2)
+// AsInt64x2 returns an Int64x2 with the same bit representation as x.
+func (x Uint32x4) AsInt64x2() Int64x2
-// Uint8x16 converts from Uint32x4 to Uint8x16
-func (from Uint32x4) AsUint8x16() (to Uint8x16)
+// AsUint8x16 returns a Uint8x16 with the same bit representation as x.
+func (x Uint32x4) AsUint8x16() Uint8x16
-// Uint16x8 converts from Uint32x4 to Uint16x8
-func (from Uint32x4) AsUint16x8() (to Uint16x8)
+// AsUint16x8 returns a Uint16x8 with the same bit representation as x.
+func (x Uint32x4) AsUint16x8() Uint16x8
-// Uint64x2 converts from Uint32x4 to Uint64x2
-func (from Uint32x4) AsUint64x2() (to Uint64x2)
+// AsUint64x2 returns a Uint64x2 with the same bit representation as x.
+func (x Uint32x4) AsUint64x2() Uint64x2
-// Float32x8 converts from Uint32x8 to Float32x8
-func (from Uint32x8) AsFloat32x8() (to Float32x8)
+// AsFloat32x8 returns a Float32x8 with the same bit representation as x.
+func (x Uint32x8) AsFloat32x8() Float32x8
-// Float64x4 converts from Uint32x8 to Float64x4
-func (from Uint32x8) AsFloat64x4() (to Float64x4)
+// AsFloat64x4 returns a Float64x4 with the same bit representation as x.
+func (x Uint32x8) AsFloat64x4() Float64x4
-// Int8x32 converts from Uint32x8 to Int8x32
-func (from Uint32x8) AsInt8x32() (to Int8x32)
+// AsInt8x32 returns an Int8x32 with the same bit representation as x.
+func (x Uint32x8) AsInt8x32() Int8x32
-// Int16x16 converts from Uint32x8 to Int16x16
-func (from Uint32x8) AsInt16x16() (to Int16x16)
+// AsInt16x16 returns an Int16x16 with the same bit representation as x.
+func (x Uint32x8) AsInt16x16() Int16x16
-// Int32x8 converts from Uint32x8 to Int32x8
-func (from Uint32x8) AsInt32x8() (to Int32x8)
+// AsInt32x8 returns an Int32x8 with the same bit representation as x.
+func (x Uint32x8) AsInt32x8() Int32x8
-// Int64x4 converts from Uint32x8 to Int64x4
-func (from Uint32x8) AsInt64x4() (to Int64x4)
+// AsInt64x4 returns an Int64x4 with the same bit representation as x.
+func (x Uint32x8) AsInt64x4() Int64x4
-// Uint8x32 converts from Uint32x8 to Uint8x32
-func (from Uint32x8) AsUint8x32() (to Uint8x32)
+// AsUint8x32 returns a Uint8x32 with the same bit representation as x.
+func (x Uint32x8) AsUint8x32() Uint8x32
-// Uint16x16 converts from Uint32x8 to Uint16x16
-func (from Uint32x8) AsUint16x16() (to Uint16x16)
+// AsUint16x16 returns a Uint16x16 with the same bit representation as x.
+func (x Uint32x8) AsUint16x16() Uint16x16
-// Uint64x4 converts from Uint32x8 to Uint64x4
-func (from Uint32x8) AsUint64x4() (to Uint64x4)
+// AsUint64x4 returns a Uint64x4 with the same bit representation as x.
+func (x Uint32x8) AsUint64x4() Uint64x4
-// Float32x16 converts from Uint32x16 to Float32x16
-func (from Uint32x16) AsFloat32x16() (to Float32x16)
+// AsFloat32x16 returns a Float32x16 with the same bit representation as x.
+func (x Uint32x16) AsFloat32x16() Float32x16
-// Float64x8 converts from Uint32x16 to Float64x8
-func (from Uint32x16) AsFloat64x8() (to Float64x8)
+// AsFloat64x8 returns a Float64x8 with the same bit representation as x.
+func (x Uint32x16) AsFloat64x8() Float64x8
-// Int8x64 converts from Uint32x16 to Int8x64
-func (from Uint32x16) AsInt8x64() (to Int8x64)
+// AsInt8x64 returns an Int8x64 with the same bit representation as x.
+func (x Uint32x16) AsInt8x64() Int8x64
-// Int16x32 converts from Uint32x16 to Int16x32
-func (from Uint32x16) AsInt16x32() (to Int16x32)
+// AsInt16x32 returns an Int16x32 with the same bit representation as x.
+func (x Uint32x16) AsInt16x32() Int16x32
-// Int32x16 converts from Uint32x16 to Int32x16
-func (from Uint32x16) AsInt32x16() (to Int32x16)
+// AsInt32x16 returns an Int32x16 with the same bit representation as x.
+func (x Uint32x16) AsInt32x16() Int32x16
-// Int64x8 converts from Uint32x16 to Int64x8
-func (from Uint32x16) AsInt64x8() (to Int64x8)
+// AsInt64x8 returns an Int64x8 with the same bit representation as x.
+func (x Uint32x16) AsInt64x8() Int64x8
-// Uint8x64 converts from Uint32x16 to Uint8x64
-func (from Uint32x16) AsUint8x64() (to Uint8x64)
+// AsUint8x64 returns a Uint8x64 with the same bit representation as x.
+func (x Uint32x16) AsUint8x64() Uint8x64
-// Uint16x32 converts from Uint32x16 to Uint16x32
-func (from Uint32x16) AsUint16x32() (to Uint16x32)
+// AsUint16x32 returns a Uint16x32 with the same bit representation as x.
+func (x Uint32x16) AsUint16x32() Uint16x32
-// Uint64x8 converts from Uint32x16 to Uint64x8
-func (from Uint32x16) AsUint64x8() (to Uint64x8)
+// AsUint64x8 returns a Uint64x8 with the same bit representation as x.
+func (x Uint32x16) AsUint64x8() Uint64x8
-// Float32x4 converts from Uint64x2 to Float32x4
-func (from Uint64x2) AsFloat32x4() (to Float32x4)
+// AsFloat32x4 returns a Float32x4 with the same bit representation as x.
+func (x Uint64x2) AsFloat32x4() Float32x4
-// Float64x2 converts from Uint64x2 to Float64x2
-func (from Uint64x2) AsFloat64x2() (to Float64x2)
+// AsFloat64x2 returns a Float64x2 with the same bit representation as x.
+func (x Uint64x2) AsFloat64x2() Float64x2
-// Int8x16 converts from Uint64x2 to Int8x16
-func (from Uint64x2) AsInt8x16() (to Int8x16)
+// AsInt8x16 returns an Int8x16 with the same bit representation as x.
+func (x Uint64x2) AsInt8x16() Int8x16
-// Int16x8 converts from Uint64x2 to Int16x8
-func (from Uint64x2) AsInt16x8() (to Int16x8)
+// AsInt16x8 returns an Int16x8 with the same bit representation as x.
+func (x Uint64x2) AsInt16x8() Int16x8
-// Int32x4 converts from Uint64x2 to Int32x4
-func (from Uint64x2) AsInt32x4() (to Int32x4)
+// AsInt32x4 returns an Int32x4 with the same bit representation as x.
+func (x Uint64x2) AsInt32x4() Int32x4
-// Int64x2 converts from Uint64x2 to Int64x2
-func (from Uint64x2) AsInt64x2() (to Int64x2)
+// AsInt64x2 returns an Int64x2 with the same bit representation as x.
+func (x Uint64x2) AsInt64x2() Int64x2
-// Uint8x16 converts from Uint64x2 to Uint8x16
-func (from Uint64x2) AsUint8x16() (to Uint8x16)
+// AsUint8x16 returns a Uint8x16 with the same bit representation as x.
+func (x Uint64x2) AsUint8x16() Uint8x16
-// Uint16x8 converts from Uint64x2 to Uint16x8
-func (from Uint64x2) AsUint16x8() (to Uint16x8)
+// AsUint16x8 returns a Uint16x8 with the same bit representation as x.
+func (x Uint64x2) AsUint16x8() Uint16x8
-// Uint32x4 converts from Uint64x2 to Uint32x4
-func (from Uint64x2) AsUint32x4() (to Uint32x4)
+// AsUint32x4 returns a Uint32x4 with the same bit representation as x.
+func (x Uint64x2) AsUint32x4() Uint32x4
-// Float32x8 converts from Uint64x4 to Float32x8
-func (from Uint64x4) AsFloat32x8() (to Float32x8)
+// AsFloat32x8 returns a Float32x8 with the same bit representation as x.
+func (x Uint64x4) AsFloat32x8() Float32x8
-// Float64x4 converts from Uint64x4 to Float64x4
-func (from Uint64x4) AsFloat64x4() (to Float64x4)
+// AsFloat64x4 returns a Float64x4 with the same bit representation as x.
+func (x Uint64x4) AsFloat64x4() Float64x4
-// Int8x32 converts from Uint64x4 to Int8x32
-func (from Uint64x4) AsInt8x32() (to Int8x32)
+// AsInt8x32 returns an Int8x32 with the same bit representation as x.
+func (x Uint64x4) AsInt8x32() Int8x32
-// Int16x16 converts from Uint64x4 to Int16x16
-func (from Uint64x4) AsInt16x16() (to Int16x16)
+// AsInt16x16 returns an Int16x16 with the same bit representation as x.
+func (x Uint64x4) AsInt16x16() Int16x16
-// Int32x8 converts from Uint64x4 to Int32x8
-func (from Uint64x4) AsInt32x8() (to Int32x8)
+// AsInt32x8 returns an Int32x8 with the same bit representation as x.
+func (x Uint64x4) AsInt32x8() Int32x8
-// Int64x4 converts from Uint64x4 to Int64x4
-func (from Uint64x4) AsInt64x4() (to Int64x4)
+// AsInt64x4 returns an Int64x4 with the same bit representation as x.
+func (x Uint64x4) AsInt64x4() Int64x4
-// Uint8x32 converts from Uint64x4 to Uint8x32
-func (from Uint64x4) AsUint8x32() (to Uint8x32)
+// AsUint8x32 returns a Uint8x32 with the same bit representation as x.
+func (x Uint64x4) AsUint8x32() Uint8x32
-// Uint16x16 converts from Uint64x4 to Uint16x16
-func (from Uint64x4) AsUint16x16() (to Uint16x16)
+// AsUint16x16 returns a Uint16x16 with the same bit representation as x.
+func (x Uint64x4) AsUint16x16() Uint16x16
-// Uint32x8 converts from Uint64x4 to Uint32x8
-func (from Uint64x4) AsUint32x8() (to Uint32x8)
+// AsUint32x8 returns a Uint32x8 with the same bit representation as x.
+func (x Uint64x4) AsUint32x8() Uint32x8
-// Float32x16 converts from Uint64x8 to Float32x16
-func (from Uint64x8) AsFloat32x16() (to Float32x16)
+// AsFloat32x16 returns a Float32x16 with the same bit representation as x.
+func (x Uint64x8) AsFloat32x16() Float32x16
-// Float64x8 converts from Uint64x8 to Float64x8
-func (from Uint64x8) AsFloat64x8() (to Float64x8)
+// AsFloat64x8 returns a Float64x8 with the same bit representation as x.
+func (x Uint64x8) AsFloat64x8() Float64x8
-// Int8x64 converts from Uint64x8 to Int8x64
-func (from Uint64x8) AsInt8x64() (to Int8x64)
+// AsInt8x64 returns an Int8x64 with the same bit representation as x.
+func (x Uint64x8) AsInt8x64() Int8x64
-// Int16x32 converts from Uint64x8 to Int16x32
-func (from Uint64x8) AsInt16x32() (to Int16x32)
+// AsInt16x32 returns an Int16x32 with the same bit representation as x.
+func (x Uint64x8) AsInt16x32() Int16x32
-// Int32x16 converts from Uint64x8 to Int32x16
-func (from Uint64x8) AsInt32x16() (to Int32x16)
+// AsInt32x16 returns an Int32x16 with the same bit representation as x.
+func (x Uint64x8) AsInt32x16() Int32x16
-// Int64x8 converts from Uint64x8 to Int64x8
-func (from Uint64x8) AsInt64x8() (to Int64x8)
+// AsInt64x8 returns an Int64x8 with the same bit representation as x.
+func (x Uint64x8) AsInt64x8() Int64x8
-// Uint8x64 converts from Uint64x8 to Uint8x64
-func (from Uint64x8) AsUint8x64() (to Uint8x64)
+// AsUint8x64 returns a Uint8x64 with the same bit representation as x.
+func (x Uint64x8) AsUint8x64() Uint8x64
-// Uint16x32 converts from Uint64x8 to Uint16x32
-func (from Uint64x8) AsUint16x32() (to Uint16x32)
+// AsUint16x32 returns a Uint16x32 with the same bit representation as x.
+func (x Uint64x8) AsUint16x32() Uint16x32
-// Uint32x16 converts from Uint64x8 to Uint32x16
-func (from Uint64x8) AsUint32x16() (to Uint32x16)
+// AsUint32x16 returns a Uint32x16 with the same bit representation as x.
+func (x Uint64x8) AsUint32x16() Uint32x16
-// ToInt8x16 converts from Mask8x16 to Int8x16
+// ToInt8x16 converts from Mask8x16 to Int8x16.
func (from Mask8x16) ToInt8x16() (to Int8x16)
-// asMask converts from Int8x16 to Mask8x16
+// asMask converts from Int8x16 to Mask8x16.
func (from Int8x16) asMask() (to Mask8x16)
func (x Mask8x16) And(y Mask8x16) Mask8x16
func (x Mask8x16) Or(y Mask8x16) Mask8x16
-// ToInt8x32 converts from Mask8x32 to Int8x32
+// ToInt8x32 converts from Mask8x32 to Int8x32.
func (from Mask8x32) ToInt8x32() (to Int8x32)
-// asMask converts from Int8x32 to Mask8x32
+// asMask converts from Int8x32 to Mask8x32.
func (from Int8x32) asMask() (to Mask8x32)
func (x Mask8x32) And(y Mask8x32) Mask8x32
func (x Mask8x32) Or(y Mask8x32) Mask8x32
-// ToInt8x64 converts from Mask8x64 to Int8x64
+// ToInt8x64 converts from Mask8x64 to Int8x64.
func (from Mask8x64) ToInt8x64() (to Int8x64)
-// asMask converts from Int8x64 to Mask8x64
+// asMask converts from Int8x64 to Mask8x64.
func (from Int8x64) asMask() (to Mask8x64)
func (x Mask8x64) And(y Mask8x64) Mask8x64
func (x Mask8x64) Or(y Mask8x64) Mask8x64
-// ToInt16x8 converts from Mask16x8 to Int16x8
+// ToInt16x8 converts from Mask16x8 to Int16x8.
func (from Mask16x8) ToInt16x8() (to Int16x8)
-// asMask converts from Int16x8 to Mask16x8
+// asMask converts from Int16x8 to Mask16x8.
func (from Int16x8) asMask() (to Mask16x8)
func (x Mask16x8) And(y Mask16x8) Mask16x8
func (x Mask16x8) Or(y Mask16x8) Mask16x8
-// ToInt16x16 converts from Mask16x16 to Int16x16
+// ToInt16x16 converts from Mask16x16 to Int16x16.
func (from Mask16x16) ToInt16x16() (to Int16x16)
-// asMask converts from Int16x16 to Mask16x16
+// asMask converts from Int16x16 to Mask16x16.
func (from Int16x16) asMask() (to Mask16x16)
func (x Mask16x16) And(y Mask16x16) Mask16x16
func (x Mask16x16) Or(y Mask16x16) Mask16x16
-// ToInt16x32 converts from Mask16x32 to Int16x32
+// ToInt16x32 converts from Mask16x32 to Int16x32.
func (from Mask16x32) ToInt16x32() (to Int16x32)
-// asMask converts from Int16x32 to Mask16x32
+// asMask converts from Int16x32 to Mask16x32.
func (from Int16x32) asMask() (to Mask16x32)
func (x Mask16x32) And(y Mask16x32) Mask16x32
func (x Mask16x32) Or(y Mask16x32) Mask16x32
-// ToInt32x4 converts from Mask32x4 to Int32x4
+// ToInt32x4 converts from Mask32x4 to Int32x4.
func (from Mask32x4) ToInt32x4() (to Int32x4)
-// asMask converts from Int32x4 to Mask32x4
+// asMask converts from Int32x4 to Mask32x4.
func (from Int32x4) asMask() (to Mask32x4)
func (x Mask32x4) And(y Mask32x4) Mask32x4
func (x Mask32x4) Or(y Mask32x4) Mask32x4
-// ToInt32x8 converts from Mask32x8 to Int32x8
+// ToInt32x8 converts from Mask32x8 to Int32x8.
func (from Mask32x8) ToInt32x8() (to Int32x8)
-// asMask converts from Int32x8 to Mask32x8
+// asMask converts from Int32x8 to Mask32x8.
func (from Int32x8) asMask() (to Mask32x8)
func (x Mask32x8) And(y Mask32x8) Mask32x8
func (x Mask32x8) Or(y Mask32x8) Mask32x8
-// ToInt32x16 converts from Mask32x16 to Int32x16
+// ToInt32x16 converts from Mask32x16 to Int32x16.
func (from Mask32x16) ToInt32x16() (to Int32x16)
-// asMask converts from Int32x16 to Mask32x16
+// asMask converts from Int32x16 to Mask32x16.
func (from Int32x16) asMask() (to Mask32x16)
func (x Mask32x16) And(y Mask32x16) Mask32x16
func (x Mask32x16) Or(y Mask32x16) Mask32x16
-// ToInt64x2 converts from Mask64x2 to Int64x2
+// ToInt64x2 converts from Mask64x2 to Int64x2.
func (from Mask64x2) ToInt64x2() (to Int64x2)
-// asMask converts from Int64x2 to Mask64x2
+// asMask converts from Int64x2 to Mask64x2.
func (from Int64x2) asMask() (to Mask64x2)
func (x Mask64x2) And(y Mask64x2) Mask64x2
func (x Mask64x2) Or(y Mask64x2) Mask64x2
-// ToInt64x4 converts from Mask64x4 to Int64x4
+// ToInt64x4 converts from Mask64x4 to Int64x4.
func (from Mask64x4) ToInt64x4() (to Int64x4)
-// asMask converts from Int64x4 to Mask64x4
+// asMask converts from Int64x4 to Mask64x4.
func (from Int64x4) asMask() (to Mask64x4)
func (x Mask64x4) And(y Mask64x4) Mask64x4
func (x Mask64x4) Or(y Mask64x4) Mask64x4
-// ToInt64x8 converts from Mask64x8 to Int64x8
+// ToInt64x8 converts from Mask64x8 to Int64x8.
func (from Mask64x8) ToInt64x8() (to Int64x8)
-// asMask converts from Int64x8 to Mask64x8
+// asMask converts from Int64x8 to Mask64x8.
func (from Int64x8) asMask() (to Mask64x8)
func (x Mask64x8) And(y Mask64x8) Mask64x8
diff --git a/src/simd/archsimd/ops_internal_amd64.go b/src/simd/archsimd/ops_internal_amd64.go
index 566b88d510..8eae69a7ba 100644
--- a/src/simd/archsimd/ops_internal_amd64.go
+++ b/src/simd/archsimd/ops_internal_amd64.go
@@ -1,4 +1,4 @@
-// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
//go:build goexperiment.simd
@@ -382,7 +382,9 @@ func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x
/* permuteScalars */
// permuteScalars performs a permutation of vector x using constant indices:
-// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
+//
+// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
//
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
@@ -391,7 +393,9 @@ func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x
func (x Int32x4) permuteScalars(indices uint8) Int32x4
// permuteScalars performs a permutation of vector x using constant indices:
-// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
+//
+// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
//
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
@@ -402,7 +406,9 @@ func (x Uint32x4) permuteScalars(indices uint8) Uint32x4
/* permuteScalarsGrouped */
// permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
-// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+//
+// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
// Each group is of size 128-bit.
//
@@ -412,7 +418,9 @@ func (x Uint32x4) permuteScalars(indices uint8) Uint32x4
func (x Int32x8) permuteScalarsGrouped(indices uint8) Int32x8
// permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
-// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+//
+// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
// Each group is of size 128-bit.
//
@@ -422,7 +430,9 @@ func (x Int32x8) permuteScalarsGrouped(indices uint8) Int32x8
func (x Int32x16) permuteScalarsGrouped(indices uint8) Int32x16
// permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
-// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+//
+// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
// Each group is of size 128-bit.
//
@@ -432,7 +442,9 @@ func (x Int32x16) permuteScalarsGrouped(indices uint8) Int32x16
func (x Uint32x8) permuteScalarsGrouped(indices uint8) Uint32x8
// permuteScalarsGrouped performs a grouped permutation of vector x using constant indices:
-// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+//
+// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
// Each group is of size 128-bit.
//
@@ -444,7 +456,9 @@ func (x Uint32x16) permuteScalarsGrouped(indices uint8) Uint32x16
/* permuteScalarsHi */
// permuteScalarsHi performs a permutation of vector x using constant indices:
-// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
+//
+// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
//
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
@@ -453,7 +467,9 @@ func (x Uint32x16) permuteScalarsGrouped(indices uint8) Uint32x16
func (x Int16x8) permuteScalarsHi(indices uint8) Int16x8
// permuteScalarsHi performs a permutation of vector x using constant indices:
-// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
+//
+// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
//
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
@@ -522,7 +538,9 @@ func (x Uint16x32) permuteScalarsHiGrouped(indices uint8) Uint16x32
/* permuteScalarsLo */
// permuteScalarsLo performs a permutation of vector x using constant indices:
-// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
+//
+// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
//
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
@@ -531,7 +549,9 @@ func (x Uint16x32) permuteScalarsHiGrouped(indices uint8) Uint16x32
func (x Int16x8) permuteScalarsLo(indices uint8) Int16x8
// permuteScalarsLo performs a permutation of vector x using constant indices:
-// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
+//
+// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]}
+//
// Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index.
//
// indices results in better performance when it's a constant, a non-constant value will be translated into a jump table.
diff --git a/src/simd/archsimd/other_gen_amd64.go b/src/simd/archsimd/other_gen_amd64.go
index 8d04409197..647001acce 100644
--- a/src/simd/archsimd/other_gen_amd64.go
+++ b/src/simd/archsimd/other_gen_amd64.go
@@ -1,4 +1,4 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
//go:build goexperiment.simd
@@ -7,7 +7,7 @@ package archsimd
// BroadcastInt8x16 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastInt8x16(x int8) Int8x16 {
var z Int8x16
return z.SetElem(0, x).Broadcast128()
@@ -16,7 +16,7 @@ func BroadcastInt8x16(x int8) Int8x16 {
// BroadcastInt16x8 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastInt16x8(x int16) Int16x8 {
var z Int16x8
return z.SetElem(0, x).Broadcast128()
@@ -25,7 +25,7 @@ func BroadcastInt16x8(x int16) Int16x8 {
// BroadcastInt32x4 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastInt32x4(x int32) Int32x4 {
var z Int32x4
return z.SetElem(0, x).Broadcast128()
@@ -34,7 +34,7 @@ func BroadcastInt32x4(x int32) Int32x4 {
// BroadcastInt64x2 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastInt64x2(x int64) Int64x2 {
var z Int64x2
return z.SetElem(0, x).Broadcast128()
@@ -43,7 +43,7 @@ func BroadcastInt64x2(x int64) Int64x2 {
// BroadcastUint8x16 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastUint8x16(x uint8) Uint8x16 {
var z Uint8x16
return z.SetElem(0, x).Broadcast128()
@@ -52,7 +52,7 @@ func BroadcastUint8x16(x uint8) Uint8x16 {
// BroadcastUint16x8 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastUint16x8(x uint16) Uint16x8 {
var z Uint16x8
return z.SetElem(0, x).Broadcast128()
@@ -61,7 +61,7 @@ func BroadcastUint16x8(x uint16) Uint16x8 {
// BroadcastUint32x4 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastUint32x4(x uint32) Uint32x4 {
var z Uint32x4
return z.SetElem(0, x).Broadcast128()
@@ -70,7 +70,7 @@ func BroadcastUint32x4(x uint32) Uint32x4 {
// BroadcastUint64x2 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastUint64x2(x uint64) Uint64x2 {
var z Uint64x2
return z.SetElem(0, x).Broadcast128()
@@ -79,7 +79,7 @@ func BroadcastUint64x2(x uint64) Uint64x2 {
// BroadcastFloat32x4 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastFloat32x4(x float32) Float32x4 {
var z Float32x4
return z.SetElem(0, x).Broadcast128()
@@ -88,7 +88,7 @@ func BroadcastFloat32x4(x float32) Float32x4 {
// BroadcastFloat64x2 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastFloat64x2(x float64) Float64x2 {
var z Float64x2
return z.SetElem(0, x).Broadcast128()
@@ -97,7 +97,7 @@ func BroadcastFloat64x2(x float64) Float64x2 {
// BroadcastInt8x32 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastInt8x32(x int8) Int8x32 {
var z Int8x16
return z.SetElem(0, x).Broadcast256()
@@ -106,7 +106,7 @@ func BroadcastInt8x32(x int8) Int8x32 {
// BroadcastInt16x16 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastInt16x16(x int16) Int16x16 {
var z Int16x8
return z.SetElem(0, x).Broadcast256()
@@ -115,7 +115,7 @@ func BroadcastInt16x16(x int16) Int16x16 {
// BroadcastInt32x8 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastInt32x8(x int32) Int32x8 {
var z Int32x4
return z.SetElem(0, x).Broadcast256()
@@ -124,7 +124,7 @@ func BroadcastInt32x8(x int32) Int32x8 {
// BroadcastInt64x4 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastInt64x4(x int64) Int64x4 {
var z Int64x2
return z.SetElem(0, x).Broadcast256()
@@ -133,7 +133,7 @@ func BroadcastInt64x4(x int64) Int64x4 {
// BroadcastUint8x32 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastUint8x32(x uint8) Uint8x32 {
var z Uint8x16
return z.SetElem(0, x).Broadcast256()
@@ -142,7 +142,7 @@ func BroadcastUint8x32(x uint8) Uint8x32 {
// BroadcastUint16x16 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastUint16x16(x uint16) Uint16x16 {
var z Uint16x8
return z.SetElem(0, x).Broadcast256()
@@ -151,7 +151,7 @@ func BroadcastUint16x16(x uint16) Uint16x16 {
// BroadcastUint32x8 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastUint32x8(x uint32) Uint32x8 {
var z Uint32x4
return z.SetElem(0, x).Broadcast256()
@@ -160,7 +160,7 @@ func BroadcastUint32x8(x uint32) Uint32x8 {
// BroadcastUint64x4 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastUint64x4(x uint64) Uint64x4 {
var z Uint64x2
return z.SetElem(0, x).Broadcast256()
@@ -169,7 +169,7 @@ func BroadcastUint64x4(x uint64) Uint64x4 {
// BroadcastFloat32x8 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastFloat32x8(x float32) Float32x8 {
var z Float32x4
return z.SetElem(0, x).Broadcast256()
@@ -178,7 +178,7 @@ func BroadcastFloat32x8(x float32) Float32x8 {
// BroadcastFloat64x4 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func BroadcastFloat64x4(x float64) Float64x4 {
var z Float64x2
return z.SetElem(0, x).Broadcast256()
@@ -187,7 +187,7 @@ func BroadcastFloat64x4(x float64) Float64x4 {
// BroadcastInt8x64 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512BW
+// Emulated, CPU Feature: AVX512BW
func BroadcastInt8x64(x int8) Int8x64 {
var z Int8x16
return z.SetElem(0, x).Broadcast512()
@@ -196,7 +196,7 @@ func BroadcastInt8x64(x int8) Int8x64 {
// BroadcastInt16x32 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512BW
+// Emulated, CPU Feature: AVX512BW
func BroadcastInt16x32(x int16) Int16x32 {
var z Int16x8
return z.SetElem(0, x).Broadcast512()
@@ -205,7 +205,7 @@ func BroadcastInt16x32(x int16) Int16x32 {
// BroadcastInt32x16 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512F
+// Emulated, CPU Feature: AVX512F
func BroadcastInt32x16(x int32) Int32x16 {
var z Int32x4
return z.SetElem(0, x).Broadcast512()
@@ -214,7 +214,7 @@ func BroadcastInt32x16(x int32) Int32x16 {
// BroadcastInt64x8 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512F
+// Emulated, CPU Feature: AVX512F
func BroadcastInt64x8(x int64) Int64x8 {
var z Int64x2
return z.SetElem(0, x).Broadcast512()
@@ -223,7 +223,7 @@ func BroadcastInt64x8(x int64) Int64x8 {
// BroadcastUint8x64 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512BW
+// Emulated, CPU Feature: AVX512BW
func BroadcastUint8x64(x uint8) Uint8x64 {
var z Uint8x16
return z.SetElem(0, x).Broadcast512()
@@ -232,7 +232,7 @@ func BroadcastUint8x64(x uint8) Uint8x64 {
// BroadcastUint16x32 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512BW
+// Emulated, CPU Feature: AVX512BW
func BroadcastUint16x32(x uint16) Uint16x32 {
var z Uint16x8
return z.SetElem(0, x).Broadcast512()
@@ -241,7 +241,7 @@ func BroadcastUint16x32(x uint16) Uint16x32 {
// BroadcastUint32x16 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512F
+// Emulated, CPU Feature: AVX512F
func BroadcastUint32x16(x uint32) Uint32x16 {
var z Uint32x4
return z.SetElem(0, x).Broadcast512()
@@ -250,7 +250,7 @@ func BroadcastUint32x16(x uint32) Uint32x16 {
// BroadcastUint64x8 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512F
+// Emulated, CPU Feature: AVX512F
func BroadcastUint64x8(x uint64) Uint64x8 {
var z Uint64x2
return z.SetElem(0, x).Broadcast512()
@@ -259,7 +259,7 @@ func BroadcastUint64x8(x uint64) Uint64x8 {
// BroadcastFloat32x16 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512F
+// Emulated, CPU Feature: AVX512F
func BroadcastFloat32x16(x float32) Float32x16 {
var z Float32x4
return z.SetElem(0, x).Broadcast512()
@@ -268,7 +268,7 @@ func BroadcastFloat32x16(x float32) Float32x16 {
// BroadcastFloat64x8 returns a vector with the input
// x assigned to all elements of the output.
//
-// Emulated, CPU Feature AVX512F
+// Emulated, CPU Feature: AVX512F
func BroadcastFloat64x8(x float64) Float64x8 {
var z Float64x2
return z.SetElem(0, x).Broadcast512()
@@ -334,378 +334,378 @@ func (from Int64x8) ToMask() (to Mask64x8) {
return from.NotEqual(Int64x8{})
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int8x16) Not() Int8x16 {
return x.Xor(x.Equal(x).ToInt8x16())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int16x8) Not() Int16x8 {
return x.Xor(x.Equal(x).ToInt16x8())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int32x4) Not() Int32x4 {
return x.Xor(x.Equal(x).ToInt32x4())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Int64x2) Not() Int64x2 {
return x.Xor(x.Equal(x).ToInt64x2())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int8x32) Not() Int8x32 {
return x.Xor(x.Equal(x).ToInt8x32())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int16x16) Not() Int16x16 {
return x.Xor(x.Equal(x).ToInt16x16())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int32x8) Not() Int32x8 {
return x.Xor(x.Equal(x).ToInt32x8())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Int64x4) Not() Int64x4 {
return x.Xor(x.Equal(x).ToInt64x4())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX512
+// Emulated, CPU Feature: AVX512
func (x Int8x64) Not() Int8x64 {
return x.Xor(x.Equal(x).ToInt8x64())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX512
+// Emulated, CPU Feature: AVX512
func (x Int16x32) Not() Int16x32 {
return x.Xor(x.Equal(x).ToInt16x32())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX512
+// Emulated, CPU Feature: AVX512
func (x Int32x16) Not() Int32x16 {
return x.Xor(x.Equal(x).ToInt32x16())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX512
+// Emulated, CPU Feature: AVX512
func (x Int64x8) Not() Int64x8 {
return x.Xor(x.Equal(x).ToInt64x8())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint8x16) Not() Uint8x16 {
return x.Xor(x.Equal(x).ToInt8x16().AsUint8x16())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint16x8) Not() Uint16x8 {
return x.Xor(x.Equal(x).ToInt16x8().AsUint16x8())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint32x4) Not() Uint32x4 {
return x.Xor(x.Equal(x).ToInt32x4().AsUint32x4())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX
+// Emulated, CPU Feature: AVX
func (x Uint64x2) Not() Uint64x2 {
return x.Xor(x.Equal(x).ToInt64x2().AsUint64x2())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint8x32) Not() Uint8x32 {
return x.Xor(x.Equal(x).ToInt8x32().AsUint8x32())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint16x16) Not() Uint16x16 {
return x.Xor(x.Equal(x).ToInt16x16().AsUint16x16())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint32x8) Not() Uint32x8 {
return x.Xor(x.Equal(x).ToInt32x8().AsUint32x8())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX2
+// Emulated, CPU Feature: AVX2
func (x Uint64x4) Not() Uint64x4 {
return x.Xor(x.Equal(x).ToInt64x4().AsUint64x4())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX512
+// Emulated, CPU Feature: AVX512
func (x Uint8x64) Not() Uint8x64 {
return x.Xor(x.Equal(x).ToInt8x64().AsUint8x64())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX512
+// Emulated, CPU Feature: AVX512
func (x Uint16x32) Not() Uint16x32 {
return x.Xor(x.Equal(x).ToInt16x32().AsUint16x32())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX512
+// Emulated, CPU Feature: AVX512
func (x Uint32x16) Not() Uint32x16 {
return x.Xor(x.Equal(x).ToInt32x16().AsUint32x16())
}
-// Not returns the bitwise complement of x
+// Not returns the bitwise complement of x.
//
-// Emulated, CPU Feature AVX512
+// Emulated, CPU Feature: AVX512
func (x Uint64x8) Not() Uint64x8 {
return x.Xor(x.Equal(x).ToInt64x8().AsUint64x8())
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int8x16) String() string {
var s [16]int8
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int16x8) String() string {
var s [8]int16
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int32x4) String() string {
var s [4]int32
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int64x2) String() string {
var s [2]int64
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint8x16) String() string {
var s [16]uint8
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint16x8) String() string {
var s [8]uint16
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint32x4) String() string {
var s [4]uint32
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint64x2) String() string {
var s [2]uint64
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Float32x4) String() string {
var s [4]float32
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Float64x2) String() string {
var s [2]float64
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int8x32) String() string {
var s [32]int8
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int16x16) String() string {
var s [16]int16
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int32x8) String() string {
var s [8]int32
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int64x4) String() string {
var s [4]int64
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint8x32) String() string {
var s [32]uint8
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint16x16) String() string {
var s [16]uint16
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint32x8) String() string {
var s [8]uint32
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint64x4) String() string {
var s [4]uint64
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Float32x8) String() string {
var s [8]float32
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Float64x4) String() string {
var s [4]float64
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int8x64) String() string {
var s [64]int8
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int16x32) String() string {
var s [32]int16
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int32x16) String() string {
var s [16]int32
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Int64x8) String() string {
var s [8]int64
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint8x64) String() string {
var s [64]uint8
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint16x32) String() string {
var s [32]uint16
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint32x16) String() string {
var s [16]uint32
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Uint64x8) String() string {
var s [8]uint64
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Float32x16) String() string {
var s [16]float32
x.Store(&s)
return sliceToString(s[:])
}
-// String returns a string representation of SIMD vector x
+// String returns a string representation of SIMD vector x.
func (x Float64x8) String() string {
var s [8]float64
x.Store(&s)
diff --git a/src/simd/archsimd/shuffles_amd64.go b/src/simd/archsimd/shuffles_amd64.go
index 2bbd89c725..355634fcae 100644
--- a/src/simd/archsimd/shuffles_amd64.go
+++ b/src/simd/archsimd/shuffles_amd64.go
@@ -54,7 +54,10 @@ const (
// requires two. a is the source index of the least element in the
// output, and b, c, and d are the indices of the 2nd, 3rd, and 4th
// elements in the output. For example,
-// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) returns {4,8,25,81}
+//
+// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81})
+//
+// returns {4,8,25,81}.
//
// If the selectors are not constant this will translate to a function
// call.
@@ -133,7 +136,10 @@ func (x Int32x4) SelectFromPair(a, b, c, d uint8, y Int32x4) Int32x4 {
// it requires two. a is the source index of the least element in the
// output, and b, c, and d are the indices of the 2nd, 3rd, and 4th
// elements in the output. For example,
-// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) returns {4,8,25,81}
+//
+// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81})
+//
+// returns {4,8,25,81}.
//
// If the selectors are not constant this will translate to a function
// call.
@@ -205,7 +211,10 @@ func (x Uint32x4) SelectFromPair(a, b, c, d uint8, y Uint32x4) Uint32x4 {
// it requires two. a is the source index of the least element in the
// output, and b, c, and d are the indices of the 2nd, 3rd, and 4th
// elements in the output. For example,
-// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) returns {4,8,25,81}
+//
+// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81})
+//
+// returns {4,8,25,81}.
//
// If the selectors are not constant this will translate to a function
// call.
@@ -278,9 +287,10 @@ func (x Float32x4) SelectFromPair(a, b, c, d uint8, y Float32x4) Float32x4 {
// it requires two. a is the source index of the least element in the
// output, and b, c, and d are the indices of the 2nd, 3rd, and 4th
// elements in the output. For example,
-// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289})
//
-// returns {4,8,25,81,64,128,169,289}
+// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289})
+//
+// returns {4,8,25,81,64,128,169,289}.
//
// If the selectors are not constant this will translate to a function
// call.
@@ -353,9 +363,10 @@ func (x Int32x8) SelectFromPairGrouped(a, b, c, d uint8, y Int32x8) Int32x8 {
// it requires two. a is the source index of the least element in the
// output, and b, c, and d are the indices of the 2nd, 3rd, and 4th
// elements in the output. For example,
-// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289})
//
-// returns {4,8,25,81,64,128,169,289}
+// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289})
+//
+// returns {4,8,25,81,64,128,169,289}.
//
// If the selectors are not constant this will translate to a function
// call.
@@ -428,9 +439,10 @@ func (x Uint32x8) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x8) Uint32x8 {
// it requires two. a is the source index of the least element in the
// output, and b, c, and d are the indices of the 2nd, 3rd, and 4th
// elements in the output. For example,
-// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289})
//
-// returns {4,8,25,81,64,128,169,289}
+// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289})
+//
+// returns {4,8,25,81,64,128,169,289}.
//
// If the selectors are not constant this will translate to a function
// call.
@@ -1080,7 +1092,7 @@ func (x Uint32x16) PermuteScalarsGrouped(a, b, c, d uint8) Uint32x16 {
// PermuteScalarsHi performs a permutation of vector x using the supplied indices:
//
-// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]}
+// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]}
//
// Parameters a,b,c,d should have values between 0 and 3.
// If a through d are constants, then an instruction will be inlined, otherwise
@@ -1093,7 +1105,7 @@ func (x Int16x8) PermuteScalarsHi(a, b, c, d uint8) Int16x8 {
// PermuteScalarsHi performs a permutation of vector x using the supplied indices:
//
-// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]}
+// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]}
//
// Parameters a,b,c,d should have values between 0 and 3.
// If a through d are constants, then an instruction will be inlined, otherwise
@@ -1276,7 +1288,8 @@ func (x Uint16x32) PermuteScalarsLoGrouped(a, b, c, d uint8) Uint16x32 {
//
// A carryless multiplication uses bitwise XOR instead of
// add-with-carry, for example (in base two):
-// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
+//
+// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
//
// This also models multiplication of polynomials with coefficients
// from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
@@ -1300,7 +1313,8 @@ func (x Uint64x2) CarrylessMultiply(a, b uint8, y Uint64x2) Uint64x2 {
//
// A carryless multiplication uses bitwise XOR instead of
// add-with-carry, for example (in base two):
-// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
+//
+// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
//
// This also models multiplication of polynomials with coefficients
// from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
@@ -1324,7 +1338,8 @@ func (x Uint64x4) CarrylessMultiplyGrouped(a, b uint8, y Uint64x4) Uint64x4 {
//
// A carryless multiplication uses bitwise XOR instead of
// add-with-carry, for example (in base two):
-// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
+//
+// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101
//
// This also models multiplication of polynomials with coefficients
// from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 =
diff --git a/src/simd/archsimd/slice_gen_amd64.go b/src/simd/archsimd/slice_gen_amd64.go
index c03e28206d..9e34f9ca56 100644
--- a/src/simd/archsimd/slice_gen_amd64.go
+++ b/src/simd/archsimd/slice_gen_amd64.go
@@ -1,4 +1,4 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
//go:build goexperiment.simd
@@ -6,302 +6,302 @@ package archsimd
import "unsafe"
-// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s
+// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s.
func LoadInt8x16Slice(s []int8) Int8x16 {
return LoadInt8x16((*[16]int8)(s))
}
-// StoreSlice stores x into a slice of at least 16 int8s
+// StoreSlice stores x into a slice of at least 16 int8s.
func (x Int8x16) StoreSlice(s []int8) {
x.Store((*[16]int8)(s))
}
-// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s
+// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s.
func LoadInt16x8Slice(s []int16) Int16x8 {
return LoadInt16x8((*[8]int16)(s))
}
-// StoreSlice stores x into a slice of at least 8 int16s
+// StoreSlice stores x into a slice of at least 8 int16s.
func (x Int16x8) StoreSlice(s []int16) {
x.Store((*[8]int16)(s))
}
-// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s
+// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s.
func LoadInt32x4Slice(s []int32) Int32x4 {
return LoadInt32x4((*[4]int32)(s))
}
-// StoreSlice stores x into a slice of at least 4 int32s
+// StoreSlice stores x into a slice of at least 4 int32s.
func (x Int32x4) StoreSlice(s []int32) {
x.Store((*[4]int32)(s))
}
-// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s
+// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s.
func LoadInt64x2Slice(s []int64) Int64x2 {
return LoadInt64x2((*[2]int64)(s))
}
-// StoreSlice stores x into a slice of at least 2 int64s
+// StoreSlice stores x into a slice of at least 2 int64s.
func (x Int64x2) StoreSlice(s []int64) {
x.Store((*[2]int64)(s))
}
-// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s
+// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s.
func LoadUint8x16Slice(s []uint8) Uint8x16 {
return LoadUint8x16((*[16]uint8)(s))
}
-// StoreSlice stores x into a slice of at least 16 uint8s
+// StoreSlice stores x into a slice of at least 16 uint8s.
func (x Uint8x16) StoreSlice(s []uint8) {
x.Store((*[16]uint8)(s))
}
-// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s
+// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s.
func LoadUint16x8Slice(s []uint16) Uint16x8 {
return LoadUint16x8((*[8]uint16)(s))
}
-// StoreSlice stores x into a slice of at least 8 uint16s
+// StoreSlice stores x into a slice of at least 8 uint16s.
func (x Uint16x8) StoreSlice(s []uint16) {
x.Store((*[8]uint16)(s))
}
-// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s
+// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s.
func LoadUint32x4Slice(s []uint32) Uint32x4 {
return LoadUint32x4((*[4]uint32)(s))
}
-// StoreSlice stores x into a slice of at least 4 uint32s
+// StoreSlice stores x into a slice of at least 4 uint32s.
func (x Uint32x4) StoreSlice(s []uint32) {
x.Store((*[4]uint32)(s))
}
-// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s
+// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s.
func LoadUint64x2Slice(s []uint64) Uint64x2 {
return LoadUint64x2((*[2]uint64)(s))
}
-// StoreSlice stores x into a slice of at least 2 uint64s
+// StoreSlice stores x into a slice of at least 2 uint64s.
func (x Uint64x2) StoreSlice(s []uint64) {
x.Store((*[2]uint64)(s))
}
-// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s
+// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s.
func LoadFloat32x4Slice(s []float32) Float32x4 {
return LoadFloat32x4((*[4]float32)(s))
}
-// StoreSlice stores x into a slice of at least 4 float32s
+// StoreSlice stores x into a slice of at least 4 float32s.
func (x Float32x4) StoreSlice(s []float32) {
x.Store((*[4]float32)(s))
}
-// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s
+// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s.
func LoadFloat64x2Slice(s []float64) Float64x2 {
return LoadFloat64x2((*[2]float64)(s))
}
-// StoreSlice stores x into a slice of at least 2 float64s
+// StoreSlice stores x into a slice of at least 2 float64s.
func (x Float64x2) StoreSlice(s []float64) {
x.Store((*[2]float64)(s))
}
-// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s
+// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s.
func LoadInt8x32Slice(s []int8) Int8x32 {
return LoadInt8x32((*[32]int8)(s))
}
-// StoreSlice stores x into a slice of at least 32 int8s
+// StoreSlice stores x into a slice of at least 32 int8s.
func (x Int8x32) StoreSlice(s []int8) {
x.Store((*[32]int8)(s))
}
-// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s
+// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s.
func LoadInt16x16Slice(s []int16) Int16x16 {
return LoadInt16x16((*[16]int16)(s))
}
-// StoreSlice stores x into a slice of at least 16 int16s
+// StoreSlice stores x into a slice of at least 16 int16s.
func (x Int16x16) StoreSlice(s []int16) {
x.Store((*[16]int16)(s))
}
-// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s
+// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s.
func LoadInt32x8Slice(s []int32) Int32x8 {
return LoadInt32x8((*[8]int32)(s))
}
-// StoreSlice stores x into a slice of at least 8 int32s
+// StoreSlice stores x into a slice of at least 8 int32s.
func (x Int32x8) StoreSlice(s []int32) {
x.Store((*[8]int32)(s))
}
-// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s
+// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s.
func LoadInt64x4Slice(s []int64) Int64x4 {
return LoadInt64x4((*[4]int64)(s))
}
-// StoreSlice stores x into a slice of at least 4 int64s
+// StoreSlice stores x into a slice of at least 4 int64s.
func (x Int64x4) StoreSlice(s []int64) {
x.Store((*[4]int64)(s))
}
-// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s
+// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s.
func LoadUint8x32Slice(s []uint8) Uint8x32 {
return LoadUint8x32((*[32]uint8)(s))
}
-// StoreSlice stores x into a slice of at least 32 uint8s
+// StoreSlice stores x into a slice of at least 32 uint8s.
func (x Uint8x32) StoreSlice(s []uint8) {
x.Store((*[32]uint8)(s))
}
-// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s
+// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s.
func LoadUint16x16Slice(s []uint16) Uint16x16 {
return LoadUint16x16((*[16]uint16)(s))
}
-// StoreSlice stores x into a slice of at least 16 uint16s
+// StoreSlice stores x into a slice of at least 16 uint16s.
func (x Uint16x16) StoreSlice(s []uint16) {
x.Store((*[16]uint16)(s))
}
-// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s
+// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s.
func LoadUint32x8Slice(s []uint32) Uint32x8 {
return LoadUint32x8((*[8]uint32)(s))
}
-// StoreSlice stores x into a slice of at least 8 uint32s
+// StoreSlice stores x into a slice of at least 8 uint32s.
func (x Uint32x8) StoreSlice(s []uint32) {
x.Store((*[8]uint32)(s))
}
-// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s
+// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s.
func LoadUint64x4Slice(s []uint64) Uint64x4 {
return LoadUint64x4((*[4]uint64)(s))
}
-// StoreSlice stores x into a slice of at least 4 uint64s
+// StoreSlice stores x into a slice of at least 4 uint64s.
func (x Uint64x4) StoreSlice(s []uint64) {
x.Store((*[4]uint64)(s))
}
-// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s
+// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s.
func LoadFloat32x8Slice(s []float32) Float32x8 {
return LoadFloat32x8((*[8]float32)(s))
}
-// StoreSlice stores x into a slice of at least 8 float32s
+// StoreSlice stores x into a slice of at least 8 float32s.
func (x Float32x8) StoreSlice(s []float32) {
x.Store((*[8]float32)(s))
}
-// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s
+// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s.
func LoadFloat64x4Slice(s []float64) Float64x4 {
return LoadFloat64x4((*[4]float64)(s))
}
-// StoreSlice stores x into a slice of at least 4 float64s
+// StoreSlice stores x into a slice of at least 4 float64s.
func (x Float64x4) StoreSlice(s []float64) {
x.Store((*[4]float64)(s))
}
-// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s
+// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s.
func LoadInt8x64Slice(s []int8) Int8x64 {
return LoadInt8x64((*[64]int8)(s))
}
-// StoreSlice stores x into a slice of at least 64 int8s
+// StoreSlice stores x into a slice of at least 64 int8s.
func (x Int8x64) StoreSlice(s []int8) {
x.Store((*[64]int8)(s))
}
-// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s
+// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s.
func LoadInt16x32Slice(s []int16) Int16x32 {
return LoadInt16x32((*[32]int16)(s))
}
-// StoreSlice stores x into a slice of at least 32 int16s
+// StoreSlice stores x into a slice of at least 32 int16s.
func (x Int16x32) StoreSlice(s []int16) {
x.Store((*[32]int16)(s))
}
-// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s
+// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s.
func LoadInt32x16Slice(s []int32) Int32x16 {
return LoadInt32x16((*[16]int32)(s))
}
-// StoreSlice stores x into a slice of at least 16 int32s
+// StoreSlice stores x into a slice of at least 16 int32s.
func (x Int32x16) StoreSlice(s []int32) {
x.Store((*[16]int32)(s))
}
-// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s
+// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s.
func LoadInt64x8Slice(s []int64) Int64x8 {
return LoadInt64x8((*[8]int64)(s))
}
-// StoreSlice stores x into a slice of at least 8 int64s
+// StoreSlice stores x into a slice of at least 8 int64s.
func (x Int64x8) StoreSlice(s []int64) {
x.Store((*[8]int64)(s))
}
-// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s
+// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s.
func LoadUint8x64Slice(s []uint8) Uint8x64 {
return LoadUint8x64((*[64]uint8)(s))
}
-// StoreSlice stores x into a slice of at least 64 uint8s
+// StoreSlice stores x into a slice of at least 64 uint8s.
func (x Uint8x64) StoreSlice(s []uint8) {
x.Store((*[64]uint8)(s))
}
-// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s
+// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s.
func LoadUint16x32Slice(s []uint16) Uint16x32 {
return LoadUint16x32((*[32]uint16)(s))
}
-// StoreSlice stores x into a slice of at least 32 uint16s
+// StoreSlice stores x into a slice of at least 32 uint16s.
func (x Uint16x32) StoreSlice(s []uint16) {
x.Store((*[32]uint16)(s))
}
-// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s
+// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s.
func LoadUint32x16Slice(s []uint32) Uint32x16 {
return LoadUint32x16((*[16]uint32)(s))
}
-// StoreSlice stores x into a slice of at least 16 uint32s
+// StoreSlice stores x into a slice of at least 16 uint32s.
func (x Uint32x16) StoreSlice(s []uint32) {
x.Store((*[16]uint32)(s))
}
-// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s
+// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s.
func LoadUint64x8Slice(s []uint64) Uint64x8 {
return LoadUint64x8((*[8]uint64)(s))
}
-// StoreSlice stores x into a slice of at least 8 uint64s
+// StoreSlice stores x into a slice of at least 8 uint64s.
func (x Uint64x8) StoreSlice(s []uint64) {
x.Store((*[8]uint64)(s))
}
-// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s
+// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s.
func LoadFloat32x16Slice(s []float32) Float32x16 {
return LoadFloat32x16((*[16]float32)(s))
}
-// StoreSlice stores x into a slice of at least 16 float32s
+// StoreSlice stores x into a slice of at least 16 float32s.
func (x Float32x16) StoreSlice(s []float32) {
x.Store((*[16]float32)(s))
}
-// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s
+// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s.
func LoadFloat64x8Slice(s []float64) Float64x8 {
return LoadFloat64x8((*[8]float64)(s))
}
-// StoreSlice stores x into a slice of at least 8 float64s
+// StoreSlice stores x into a slice of at least 8 float64s.
func (x Float64x8) StoreSlice(s []float64) {
x.Store((*[8]float64)(s))
}
diff --git a/src/simd/archsimd/types_amd64.go b/src/simd/archsimd/types_amd64.go
index 556383b380..3d0a49dc09 100644
--- a/src/simd/archsimd/types_amd64.go
+++ b/src/simd/archsimd/types_amd64.go
@@ -1,4 +1,4 @@
-// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT.
//go:build goexperiment.simd
@@ -9,27 +9,27 @@ type v128 struct {
_128 [0]func() // uncomparable
}
-// Float32x4 is a 128-bit SIMD vector of 4 float32
+// Float32x4 is a 128-bit SIMD vector of 4 float32s.
type Float32x4 struct {
float32x4 v128
vals [4]float32
}
-// Len returns the number of elements in a Float32x4
+// Len returns the number of elements in a Float32x4.
func (x Float32x4) Len() int { return 4 }
-// LoadFloat32x4 loads a Float32x4 from an array
+// LoadFloat32x4 loads a Float32x4 from an array.
//
//go:noescape
func LoadFloat32x4(y *[4]float32) Float32x4
-// Store stores a Float32x4 to an array
+// Store stores a Float32x4 to an array.
//
//go:noescape
func (x Float32x4) Store(y *[4]float32)
// LoadMaskedFloat32x4 loads a Float32x4 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
@@ -37,34 +37,34 @@ func (x Float32x4) Store(y *[4]float32)
func LoadMaskedFloat32x4(y *[4]float32, mask Mask32x4) Float32x4
// StoreMasked stores a Float32x4 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
//go:noescape
func (x Float32x4) StoreMasked(y *[4]float32, mask Mask32x4)
-// Float64x2 is a 128-bit SIMD vector of 2 float64
+// Float64x2 is a 128-bit SIMD vector of 2 float64s.
type Float64x2 struct {
float64x2 v128
vals [2]float64
}
-// Len returns the number of elements in a Float64x2
+// Len returns the number of elements in a Float64x2.
func (x Float64x2) Len() int { return 2 }
-// LoadFloat64x2 loads a Float64x2 from an array
+// LoadFloat64x2 loads a Float64x2 from an array.
//
//go:noescape
func LoadFloat64x2(y *[2]float64) Float64x2
-// Store stores a Float64x2 to an array
+// Store stores a Float64x2 to an array.
//
//go:noescape
func (x Float64x2) Store(y *[2]float64)
// LoadMaskedFloat64x2 loads a Float64x2 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
@@ -72,180 +72,180 @@ func (x Float64x2) Store(y *[2]float64)
func LoadMaskedFloat64x2(y *[2]float64, mask Mask64x2) Float64x2
// StoreMasked stores a Float64x2 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
//go:noescape
func (x Float64x2) StoreMasked(y *[2]float64, mask Mask64x2)
-// Int8x16 is a 128-bit SIMD vector of 16 int8
+// Int8x16 is a 128-bit SIMD vector of 16 int8s.
type Int8x16 struct {
int8x16 v128
vals [16]int8
}
-// Len returns the number of elements in a Int8x16
+// Len returns the number of elements in an Int8x16.
func (x Int8x16) Len() int { return 16 }
-// LoadInt8x16 loads a Int8x16 from an array
+// LoadInt8x16 loads an Int8x16 from an array.
//
//go:noescape
func LoadInt8x16(y *[16]int8) Int8x16
-// Store stores a Int8x16 to an array
+// Store stores an Int8x16 to an array.
//
//go:noescape
func (x Int8x16) Store(y *[16]int8)
-// Int16x8 is a 128-bit SIMD vector of 8 int16
+// Int16x8 is a 128-bit SIMD vector of 8 int16s.
type Int16x8 struct {
int16x8 v128
vals [8]int16
}
-// Len returns the number of elements in a Int16x8
+// Len returns the number of elements in an Int16x8.
func (x Int16x8) Len() int { return 8 }
-// LoadInt16x8 loads a Int16x8 from an array
+// LoadInt16x8 loads an Int16x8 from an array.
//
//go:noescape
func LoadInt16x8(y *[8]int16) Int16x8
-// Store stores a Int16x8 to an array
+// Store stores an Int16x8 to an array.
//
//go:noescape
func (x Int16x8) Store(y *[8]int16)
-// Int32x4 is a 128-bit SIMD vector of 4 int32
+// Int32x4 is a 128-bit SIMD vector of 4 int32s.
type Int32x4 struct {
int32x4 v128
vals [4]int32
}
-// Len returns the number of elements in a Int32x4
+// Len returns the number of elements in an Int32x4.
func (x Int32x4) Len() int { return 4 }
-// LoadInt32x4 loads a Int32x4 from an array
+// LoadInt32x4 loads an Int32x4 from an array.
//
//go:noescape
func LoadInt32x4(y *[4]int32) Int32x4
-// Store stores a Int32x4 to an array
+// Store stores an Int32x4 to an array.
//
//go:noescape
func (x Int32x4) Store(y *[4]int32)
-// LoadMaskedInt32x4 loads a Int32x4 from an array,
-// at those elements enabled by mask
+// LoadMaskedInt32x4 loads an Int32x4 from an array,
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
//go:noescape
func LoadMaskedInt32x4(y *[4]int32, mask Mask32x4) Int32x4
-// StoreMasked stores a Int32x4 to an array,
-// at those elements enabled by mask
+// StoreMasked stores an Int32x4 to an array,
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
//go:noescape
func (x Int32x4) StoreMasked(y *[4]int32, mask Mask32x4)
-// Int64x2 is a 128-bit SIMD vector of 2 int64
+// Int64x2 is a 128-bit SIMD vector of 2 int64s.
type Int64x2 struct {
int64x2 v128
vals [2]int64
}
-// Len returns the number of elements in a Int64x2
+// Len returns the number of elements in an Int64x2.
func (x Int64x2) Len() int { return 2 }
-// LoadInt64x2 loads a Int64x2 from an array
+// LoadInt64x2 loads an Int64x2 from an array.
//
//go:noescape
func LoadInt64x2(y *[2]int64) Int64x2
-// Store stores a Int64x2 to an array
+// Store stores an Int64x2 to an array.
//
//go:noescape
func (x Int64x2) Store(y *[2]int64)
-// LoadMaskedInt64x2 loads a Int64x2 from an array,
-// at those elements enabled by mask
+// LoadMaskedInt64x2 loads an Int64x2 from an array,
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
//go:noescape
func LoadMaskedInt64x2(y *[2]int64, mask Mask64x2) Int64x2
-// StoreMasked stores a Int64x2 to an array,
-// at those elements enabled by mask
+// StoreMasked stores an Int64x2 to an array,
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
//go:noescape
func (x Int64x2) StoreMasked(y *[2]int64, mask Mask64x2)
-// Uint8x16 is a 128-bit SIMD vector of 16 uint8
+// Uint8x16 is a 128-bit SIMD vector of 16 uint8s.
type Uint8x16 struct {
uint8x16 v128
vals [16]uint8
}
-// Len returns the number of elements in a Uint8x16
+// Len returns the number of elements in a Uint8x16.
func (x Uint8x16) Len() int { return 16 }
-// LoadUint8x16 loads a Uint8x16 from an array
+// LoadUint8x16 loads a Uint8x16 from an array.
//
//go:noescape
func LoadUint8x16(y *[16]uint8) Uint8x16
-// Store stores a Uint8x16 to an array
+// Store stores a Uint8x16 to an array.
//
//go:noescape
func (x Uint8x16) Store(y *[16]uint8)
-// Uint16x8 is a 128-bit SIMD vector of 8 uint16
+// Uint16x8 is a 128-bit SIMD vector of 8 uint16s.
type Uint16x8 struct {
uint16x8 v128
vals [8]uint16
}
-// Len returns the number of elements in a Uint16x8
+// Len returns the number of elements in a Uint16x8.
func (x Uint16x8) Len() int { return 8 }
-// LoadUint16x8 loads a Uint16x8 from an array
+// LoadUint16x8 loads a Uint16x8 from an array.
//
//go:noescape
func LoadUint16x8(y *[8]uint16) Uint16x8
-// Store stores a Uint16x8 to an array
+// Store stores a Uint16x8 to an array.
//
//go:noescape
func (x Uint16x8) Store(y *[8]uint16)
-// Uint32x4 is a 128-bit SIMD vector of 4 uint32
+// Uint32x4 is a 128-bit SIMD vector of 4 uint32s.
type Uint32x4 struct {
uint32x4 v128
vals [4]uint32
}
-// Len returns the number of elements in a Uint32x4
+// Len returns the number of elements in a Uint32x4.
func (x Uint32x4) Len() int { return 4 }
-// LoadUint32x4 loads a Uint32x4 from an array
+// LoadUint32x4 loads a Uint32x4 from an array.
//
//go:noescape
func LoadUint32x4(y *[4]uint32) Uint32x4
-// Store stores a Uint32x4 to an array
+// Store stores a Uint32x4 to an array.
//
//go:noescape
func (x Uint32x4) Store(y *[4]uint32)
// LoadMaskedUint32x4 loads a Uint32x4 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
@@ -253,34 +253,34 @@ func (x Uint32x4) Store(y *[4]uint32)
func LoadMaskedUint32x4(y *[4]uint32, mask Mask32x4) Uint32x4
// StoreMasked stores a Uint32x4 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
//go:noescape
func (x Uint32x4) StoreMasked(y *[4]uint32, mask Mask32x4)
-// Uint64x2 is a 128-bit SIMD vector of 2 uint64
+// Uint64x2 is a 128-bit SIMD vector of 2 uint64s.
type Uint64x2 struct {
uint64x2 v128
vals [2]uint64
}
-// Len returns the number of elements in a Uint64x2
+// Len returns the number of elements in a Uint64x2.
func (x Uint64x2) Len() int { return 2 }
-// LoadUint64x2 loads a Uint64x2 from an array
+// LoadUint64x2 loads a Uint64x2 from an array.
//
//go:noescape
func LoadUint64x2(y *[2]uint64) Uint64x2
-// Store stores a Uint64x2 to an array
+// Store stores a Uint64x2 to an array.
//
//go:noescape
func (x Uint64x2) Store(y *[2]uint64)
// LoadMaskedUint64x2 loads a Uint64x2 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
@@ -288,14 +288,14 @@ func (x Uint64x2) Store(y *[2]uint64)
func LoadMaskedUint64x2(y *[2]uint64, mask Mask64x2) Uint64x2
// StoreMasked stores a Uint64x2 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
//go:noescape
func (x Uint64x2) StoreMasked(y *[2]uint64, mask Mask64x2)
-// Mask8x16 is a 128-bit SIMD vector of 16 int8
+// Mask8x16 is a mask for a SIMD vector of 16 8-bit elements.
type Mask8x16 struct {
int8x16 v128
vals [16]int8
@@ -308,10 +308,10 @@ func Mask8x16FromBits(y uint16) Mask8x16
// ToBits constructs a bitmap from a Mask8x16, where 1 means set for the indexed element, 0 means unset.
//
-// Asm: KMOVB, CPU Features: AVX512
+// Asm: VPMOVMSKB, CPU Features: AVX
func (x Mask8x16) ToBits() uint16
-// Mask16x8 is a 128-bit SIMD vector of 8 int16
+// Mask16x8 is a mask for a SIMD vector of 8 16-bit elements.
type Mask16x8 struct {
int16x8 v128
vals [8]int16
@@ -327,7 +327,7 @@ func Mask16x8FromBits(y uint8) Mask16x8
// Asm: KMOVW, CPU Features: AVX512
func (x Mask16x8) ToBits() uint8
-// Mask32x4 is a 128-bit SIMD vector of 4 int32
+// Mask32x4 is a mask for a SIMD vector of 4 32-bit elements.
type Mask32x4 struct {
int32x4 v128
vals [4]int32
@@ -342,10 +342,10 @@ func Mask32x4FromBits(y uint8) Mask32x4
// ToBits constructs a bitmap from a Mask32x4, where 1 means set for the indexed element, 0 means unset.
// Only the lower 4 bits of y are used.
//
-// Asm: KMOVD, CPU Features: AVX512
+// Asm: VMOVMSKPS, CPU Features: AVX
func (x Mask32x4) ToBits() uint8
-// Mask64x2 is a 128-bit SIMD vector of 2 int64
+// Mask64x2 is a mask for a SIMD vector of 2 64-bit elements.
type Mask64x2 struct {
int64x2 v128
vals [2]int64
@@ -360,7 +360,7 @@ func Mask64x2FromBits(y uint8) Mask64x2
// ToBits constructs a bitmap from a Mask64x2, where 1 means set for the indexed element, 0 means unset.
// Only the lower 2 bits of y are used.
//
-// Asm: KMOVQ, CPU Features: AVX512
+// Asm: VMOVMSKPD, CPU Features: AVX
func (x Mask64x2) ToBits() uint8
// v256 is a tag type that tells the compiler that this is really 256-bit SIMD
@@ -368,27 +368,27 @@ type v256 struct {
_256 [0]func() // uncomparable
}
-// Float32x8 is a 256-bit SIMD vector of 8 float32
+// Float32x8 is a 256-bit SIMD vector of 8 float32s.
type Float32x8 struct {
float32x8 v256
vals [8]float32
}
-// Len returns the number of elements in a Float32x8
+// Len returns the number of elements in a Float32x8.
func (x Float32x8) Len() int { return 8 }
-// LoadFloat32x8 loads a Float32x8 from an array
+// LoadFloat32x8 loads a Float32x8 from an array.
//
//go:noescape
func LoadFloat32x8(y *[8]float32) Float32x8
-// Store stores a Float32x8 to an array
+// Store stores a Float32x8 to an array.
//
//go:noescape
func (x Float32x8) Store(y *[8]float32)
// LoadMaskedFloat32x8 loads a Float32x8 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
@@ -396,34 +396,34 @@ func (x Float32x8) Store(y *[8]float32)
func LoadMaskedFloat32x8(y *[8]float32, mask Mask32x8) Float32x8
// StoreMasked stores a Float32x8 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
//go:noescape
func (x Float32x8) StoreMasked(y *[8]float32, mask Mask32x8)
-// Float64x4 is a 256-bit SIMD vector of 4 float64
+// Float64x4 is a 256-bit SIMD vector of 4 float64s.
type Float64x4 struct {
float64x4 v256
vals [4]float64
}
-// Len returns the number of elements in a Float64x4
+// Len returns the number of elements in a Float64x4.
func (x Float64x4) Len() int { return 4 }
-// LoadFloat64x4 loads a Float64x4 from an array
+// LoadFloat64x4 loads a Float64x4 from an array.
//
//go:noescape
func LoadFloat64x4(y *[4]float64) Float64x4
-// Store stores a Float64x4 to an array
+// Store stores a Float64x4 to an array.
//
//go:noescape
func (x Float64x4) Store(y *[4]float64)
// LoadMaskedFloat64x4 loads a Float64x4 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
@@ -431,180 +431,180 @@ func (x Float64x4) Store(y *[4]float64)
func LoadMaskedFloat64x4(y *[4]float64, mask Mask64x4) Float64x4
// StoreMasked stores a Float64x4 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
//go:noescape
func (x Float64x4) StoreMasked(y *[4]float64, mask Mask64x4)
-// Int8x32 is a 256-bit SIMD vector of 32 int8
+// Int8x32 is a 256-bit SIMD vector of 32 int8s.
type Int8x32 struct {
int8x32 v256
vals [32]int8
}
-// Len returns the number of elements in a Int8x32
+// Len returns the number of elements in an Int8x32.
func (x Int8x32) Len() int { return 32 }
-// LoadInt8x32 loads a Int8x32 from an array
+// LoadInt8x32 loads an Int8x32 from an array.
//
//go:noescape
func LoadInt8x32(y *[32]int8) Int8x32
-// Store stores a Int8x32 to an array
+// Store stores an Int8x32 to an array.
//
//go:noescape
func (x Int8x32) Store(y *[32]int8)
-// Int16x16 is a 256-bit SIMD vector of 16 int16
+// Int16x16 is a 256-bit SIMD vector of 16 int16s.
type Int16x16 struct {
int16x16 v256
vals [16]int16
}
-// Len returns the number of elements in a Int16x16
+// Len returns the number of elements in an Int16x16.
func (x Int16x16) Len() int { return 16 }
-// LoadInt16x16 loads a Int16x16 from an array
+// LoadInt16x16 loads an Int16x16 from an array.
//
//go:noescape
func LoadInt16x16(y *[16]int16) Int16x16
-// Store stores a Int16x16 to an array
+// Store stores an Int16x16 to an array.
//
//go:noescape
func (x Int16x16) Store(y *[16]int16)
-// Int32x8 is a 256-bit SIMD vector of 8 int32
+// Int32x8 is a 256-bit SIMD vector of 8 int32s.
type Int32x8 struct {
int32x8 v256
vals [8]int32
}
-// Len returns the number of elements in a Int32x8
+// Len returns the number of elements in an Int32x8.
func (x Int32x8) Len() int { return 8 }
-// LoadInt32x8 loads a Int32x8 from an array
+// LoadInt32x8 loads an Int32x8 from an array.
//
//go:noescape
func LoadInt32x8(y *[8]int32) Int32x8
-// Store stores a Int32x8 to an array
+// Store stores an Int32x8 to an array.
//
//go:noescape
func (x Int32x8) Store(y *[8]int32)
-// LoadMaskedInt32x8 loads a Int32x8 from an array,
-// at those elements enabled by mask
+// LoadMaskedInt32x8 loads an Int32x8 from an array,
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
//go:noescape
func LoadMaskedInt32x8(y *[8]int32, mask Mask32x8) Int32x8
-// StoreMasked stores a Int32x8 to an array,
-// at those elements enabled by mask
+// StoreMasked stores an Int32x8 to an array,
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
//go:noescape
func (x Int32x8) StoreMasked(y *[8]int32, mask Mask32x8)
-// Int64x4 is a 256-bit SIMD vector of 4 int64
+// Int64x4 is a 256-bit SIMD vector of 4 int64s.
type Int64x4 struct {
int64x4 v256
vals [4]int64
}
-// Len returns the number of elements in a Int64x4
+// Len returns the number of elements in an Int64x4.
func (x Int64x4) Len() int { return 4 }
-// LoadInt64x4 loads a Int64x4 from an array
+// LoadInt64x4 loads an Int64x4 from an array.
//
//go:noescape
func LoadInt64x4(y *[4]int64) Int64x4
-// Store stores a Int64x4 to an array
+// Store stores an Int64x4 to an array.
//
//go:noescape
func (x Int64x4) Store(y *[4]int64)
-// LoadMaskedInt64x4 loads a Int64x4 from an array,
-// at those elements enabled by mask
+// LoadMaskedInt64x4 loads an Int64x4 from an array,
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
//go:noescape
func LoadMaskedInt64x4(y *[4]int64, mask Mask64x4) Int64x4
-// StoreMasked stores a Int64x4 to an array,
-// at those elements enabled by mask
+// StoreMasked stores an Int64x4 to an array,
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
//go:noescape
func (x Int64x4) StoreMasked(y *[4]int64, mask Mask64x4)
-// Uint8x32 is a 256-bit SIMD vector of 32 uint8
+// Uint8x32 is a 256-bit SIMD vector of 32 uint8s.
type Uint8x32 struct {
uint8x32 v256
vals [32]uint8
}
-// Len returns the number of elements in a Uint8x32
+// Len returns the number of elements in a Uint8x32.
func (x Uint8x32) Len() int { return 32 }
-// LoadUint8x32 loads a Uint8x32 from an array
+// LoadUint8x32 loads a Uint8x32 from an array.
//
//go:noescape
func LoadUint8x32(y *[32]uint8) Uint8x32
-// Store stores a Uint8x32 to an array
+// Store stores a Uint8x32 to an array.
//
//go:noescape
func (x Uint8x32) Store(y *[32]uint8)
-// Uint16x16 is a 256-bit SIMD vector of 16 uint16
+// Uint16x16 is a 256-bit SIMD vector of 16 uint16s.
type Uint16x16 struct {
uint16x16 v256
vals [16]uint16
}
-// Len returns the number of elements in a Uint16x16
+// Len returns the number of elements in a Uint16x16.
func (x Uint16x16) Len() int { return 16 }
-// LoadUint16x16 loads a Uint16x16 from an array
+// LoadUint16x16 loads a Uint16x16 from an array.
//
//go:noescape
func LoadUint16x16(y *[16]uint16) Uint16x16
-// Store stores a Uint16x16 to an array
+// Store stores a Uint16x16 to an array.
//
//go:noescape
func (x Uint16x16) Store(y *[16]uint16)
-// Uint32x8 is a 256-bit SIMD vector of 8 uint32
+// Uint32x8 is a 256-bit SIMD vector of 8 uint32s.
type Uint32x8 struct {
uint32x8 v256
vals [8]uint32
}
-// Len returns the number of elements in a Uint32x8
+// Len returns the number of elements in a Uint32x8.
func (x Uint32x8) Len() int { return 8 }
-// LoadUint32x8 loads a Uint32x8 from an array
+// LoadUint32x8 loads a Uint32x8 from an array.
//
//go:noescape
func LoadUint32x8(y *[8]uint32) Uint32x8
-// Store stores a Uint32x8 to an array
+// Store stores a Uint32x8 to an array.
//
//go:noescape
func (x Uint32x8) Store(y *[8]uint32)
// LoadMaskedUint32x8 loads a Uint32x8 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
@@ -612,34 +612,34 @@ func (x Uint32x8) Store(y *[8]uint32)
func LoadMaskedUint32x8(y *[8]uint32, mask Mask32x8) Uint32x8
// StoreMasked stores a Uint32x8 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVD, CPU Feature: AVX2
//
//go:noescape
func (x Uint32x8) StoreMasked(y *[8]uint32, mask Mask32x8)
-// Uint64x4 is a 256-bit SIMD vector of 4 uint64
+// Uint64x4 is a 256-bit SIMD vector of 4 uint64s.
type Uint64x4 struct {
uint64x4 v256
vals [4]uint64
}
-// Len returns the number of elements in a Uint64x4
+// Len returns the number of elements in a Uint64x4.
func (x Uint64x4) Len() int { return 4 }
-// LoadUint64x4 loads a Uint64x4 from an array
+// LoadUint64x4 loads a Uint64x4 from an array.
//
//go:noescape
func LoadUint64x4(y *[4]uint64) Uint64x4
-// Store stores a Uint64x4 to an array
+// Store stores a Uint64x4 to an array.
//
//go:noescape
func (x Uint64x4) Store(y *[4]uint64)
// LoadMaskedUint64x4 loads a Uint64x4 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
@@ -647,14 +647,14 @@ func (x Uint64x4) Store(y *[4]uint64)
func LoadMaskedUint64x4(y *[4]uint64, mask Mask64x4) Uint64x4
// StoreMasked stores a Uint64x4 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMASKMOVQ, CPU Feature: AVX2
//
//go:noescape
func (x Uint64x4) StoreMasked(y *[4]uint64, mask Mask64x4)
-// Mask8x32 is a 256-bit SIMD vector of 32 int8
+// Mask8x32 is a mask for a SIMD vector of 32 8-bit elements.
type Mask8x32 struct {
int8x32 v256
vals [32]int8
@@ -667,10 +667,10 @@ func Mask8x32FromBits(y uint32) Mask8x32
// ToBits constructs a bitmap from a Mask8x32, where 1 means set for the indexed element, 0 means unset.
//
-// Asm: KMOVB, CPU Features: AVX512
+// Asm: VPMOVMSKB, CPU Features: AVX2
func (x Mask8x32) ToBits() uint32
-// Mask16x16 is a 256-bit SIMD vector of 16 int16
+// Mask16x16 is a mask for a SIMD vector of 16 16-bit elements.
type Mask16x16 struct {
int16x16 v256
vals [16]int16
@@ -686,7 +686,7 @@ func Mask16x16FromBits(y uint16) Mask16x16
// Asm: KMOVW, CPU Features: AVX512
func (x Mask16x16) ToBits() uint16
-// Mask32x8 is a 256-bit SIMD vector of 8 int32
+// Mask32x8 is a mask for a SIMD vector of 8 32-bit elements.
type Mask32x8 struct {
int32x8 v256
vals [8]int32
@@ -699,10 +699,10 @@ func Mask32x8FromBits(y uint8) Mask32x8
// ToBits constructs a bitmap from a Mask32x8, where 1 means set for the indexed element, 0 means unset.
//
-// Asm: KMOVD, CPU Features: AVX512
+// Asm: VMOVMSKPS, CPU Features: AVX
func (x Mask32x8) ToBits() uint8
-// Mask64x4 is a 256-bit SIMD vector of 4 int64
+// Mask64x4 is a mask for a SIMD vector of 4 64-bit elements.
type Mask64x4 struct {
int64x4 v256
vals [4]int64
@@ -717,7 +717,7 @@ func Mask64x4FromBits(y uint8) Mask64x4
// ToBits constructs a bitmap from a Mask64x4, where 1 means set for the indexed element, 0 means unset.
// Only the lower 4 bits of y are used.
//
-// Asm: KMOVQ, CPU Features: AVX512
+// Asm: VMOVMSKPD, CPU Features: AVX
func (x Mask64x4) ToBits() uint8
// v512 is a tag type that tells the compiler that this is really 512-bit SIMD
@@ -725,27 +725,27 @@ type v512 struct {
_512 [0]func() // uncomparable
}
-// Float32x16 is a 512-bit SIMD vector of 16 float32
+// Float32x16 is a 512-bit SIMD vector of 16 float32s.
type Float32x16 struct {
float32x16 v512
vals [16]float32
}
-// Len returns the number of elements in a Float32x16
+// Len returns the number of elements in a Float32x16.
func (x Float32x16) Len() int { return 16 }
-// LoadFloat32x16 loads a Float32x16 from an array
+// LoadFloat32x16 loads a Float32x16 from an array.
//
//go:noescape
func LoadFloat32x16(y *[16]float32) Float32x16
-// Store stores a Float32x16 to an array
+// Store stores a Float32x16 to an array.
//
//go:noescape
func (x Float32x16) Store(y *[16]float32)
// LoadMaskedFloat32x16 loads a Float32x16 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU32.Z, CPU Feature: AVX512
//
@@ -753,34 +753,34 @@ func (x Float32x16) Store(y *[16]float32)
func LoadMaskedFloat32x16(y *[16]float32, mask Mask32x16) Float32x16
// StoreMasked stores a Float32x16 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU32, CPU Feature: AVX512
//
//go:noescape
func (x Float32x16) StoreMasked(y *[16]float32, mask Mask32x16)
-// Float64x8 is a 512-bit SIMD vector of 8 float64
+// Float64x8 is a 512-bit SIMD vector of 8 float64s.
type Float64x8 struct {
float64x8 v512
vals [8]float64
}
-// Len returns the number of elements in a Float64x8
+// Len returns the number of elements in a Float64x8.
func (x Float64x8) Len() int { return 8 }
-// LoadFloat64x8 loads a Float64x8 from an array
+// LoadFloat64x8 loads a Float64x8 from an array.
//
//go:noescape
func LoadFloat64x8(y *[8]float64) Float64x8
-// Store stores a Float64x8 to an array
+// Store stores a Float64x8 to an array.
//
//go:noescape
func (x Float64x8) Store(y *[8]float64)
// LoadMaskedFloat64x8 loads a Float64x8 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU64.Z, CPU Feature: AVX512
//
@@ -788,174 +788,174 @@ func (x Float64x8) Store(y *[8]float64)
func LoadMaskedFloat64x8(y *[8]float64, mask Mask64x8) Float64x8
// StoreMasked stores a Float64x8 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU64, CPU Feature: AVX512
//
//go:noescape
func (x Float64x8) StoreMasked(y *[8]float64, mask Mask64x8)
-// Int8x64 is a 512-bit SIMD vector of 64 int8
+// Int8x64 is a 512-bit SIMD vector of 64 int8s.
type Int8x64 struct {
int8x64 v512
vals [64]int8
}
-// Len returns the number of elements in a Int8x64
+// Len returns the number of elements in an Int8x64.
func (x Int8x64) Len() int { return 64 }
-// LoadInt8x64 loads a Int8x64 from an array
+// LoadInt8x64 loads an Int8x64 from an array.
//
//go:noescape
func LoadInt8x64(y *[64]int8) Int8x64
-// Store stores a Int8x64 to an array
+// Store stores an Int8x64 to an array.
//
//go:noescape
func (x Int8x64) Store(y *[64]int8)
-// LoadMaskedInt8x64 loads a Int8x64 from an array,
-// at those elements enabled by mask
+// LoadMaskedInt8x64 loads an Int8x64 from an array,
+// at those elements enabled by mask.
//
// Asm: VMOVDQU8.Z, CPU Feature: AVX512
//
//go:noescape
func LoadMaskedInt8x64(y *[64]int8, mask Mask8x64) Int8x64
-// StoreMasked stores a Int8x64 to an array,
-// at those elements enabled by mask
+// StoreMasked stores an Int8x64 to an array,
+// at those elements enabled by mask.
//
// Asm: VMOVDQU8, CPU Feature: AVX512
//
//go:noescape
func (x Int8x64) StoreMasked(y *[64]int8, mask Mask8x64)
-// Int16x32 is a 512-bit SIMD vector of 32 int16
+// Int16x32 is a 512-bit SIMD vector of 32 int16s.
type Int16x32 struct {
int16x32 v512
vals [32]int16
}
-// Len returns the number of elements in a Int16x32
+// Len returns the number of elements in an Int16x32.
func (x Int16x32) Len() int { return 32 }
-// LoadInt16x32 loads a Int16x32 from an array
+// LoadInt16x32 loads an Int16x32 from an array.
//
//go:noescape
func LoadInt16x32(y *[32]int16) Int16x32
-// Store stores a Int16x32 to an array
+// Store stores an Int16x32 to an array.
//
//go:noescape
func (x Int16x32) Store(y *[32]int16)
-// LoadMaskedInt16x32 loads a Int16x32 from an array,
-// at those elements enabled by mask
+// LoadMaskedInt16x32 loads an Int16x32 from an array,
+// at those elements enabled by mask.
//
// Asm: VMOVDQU16.Z, CPU Feature: AVX512
//
//go:noescape
func LoadMaskedInt16x32(y *[32]int16, mask Mask16x32) Int16x32
-// StoreMasked stores a Int16x32 to an array,
-// at those elements enabled by mask
+// StoreMasked stores an Int16x32 to an array,
+// at those elements enabled by mask.
//
// Asm: VMOVDQU16, CPU Feature: AVX512
//
//go:noescape
func (x Int16x32) StoreMasked(y *[32]int16, mask Mask16x32)
-// Int32x16 is a 512-bit SIMD vector of 16 int32
+// Int32x16 is a 512-bit SIMD vector of 16 int32s.
type Int32x16 struct {
int32x16 v512
vals [16]int32
}
-// Len returns the number of elements in a Int32x16
+// Len returns the number of elements in an Int32x16.
func (x Int32x16) Len() int { return 16 }
-// LoadInt32x16 loads a Int32x16 from an array
+// LoadInt32x16 loads an Int32x16 from an array.
//
//go:noescape
func LoadInt32x16(y *[16]int32) Int32x16
-// Store stores a Int32x16 to an array
+// Store stores an Int32x16 to an array.
//
//go:noescape
func (x Int32x16) Store(y *[16]int32)
-// LoadMaskedInt32x16 loads a Int32x16 from an array,
-// at those elements enabled by mask
+// LoadMaskedInt32x16 loads an Int32x16 from an array,
+// at those elements enabled by mask.
//
// Asm: VMOVDQU32.Z, CPU Feature: AVX512
//
//go:noescape
func LoadMaskedInt32x16(y *[16]int32, mask Mask32x16) Int32x16
-// StoreMasked stores a Int32x16 to an array,
-// at those elements enabled by mask
+// StoreMasked stores an Int32x16 to an array,
+// at those elements enabled by mask.
//
// Asm: VMOVDQU32, CPU Feature: AVX512
//
//go:noescape
func (x Int32x16) StoreMasked(y *[16]int32, mask Mask32x16)
-// Int64x8 is a 512-bit SIMD vector of 8 int64
+// Int64x8 is a 512-bit SIMD vector of 8 int64s.
type Int64x8 struct {
int64x8 v512
vals [8]int64
}
-// Len returns the number of elements in a Int64x8
+// Len returns the number of elements in an Int64x8.
func (x Int64x8) Len() int { return 8 }
-// LoadInt64x8 loads a Int64x8 from an array
+// LoadInt64x8 loads an Int64x8 from an array.
//
//go:noescape
func LoadInt64x8(y *[8]int64) Int64x8
-// Store stores a Int64x8 to an array
+// Store stores an Int64x8 to an array.
//
//go:noescape
func (x Int64x8) Store(y *[8]int64)
-// LoadMaskedInt64x8 loads a Int64x8 from an array,
-// at those elements enabled by mask
+// LoadMaskedInt64x8 loads an Int64x8 from an array,
+// at those elements enabled by mask.
//
// Asm: VMOVDQU64.Z, CPU Feature: AVX512
//
//go:noescape
func LoadMaskedInt64x8(y *[8]int64, mask Mask64x8) Int64x8
-// StoreMasked stores a Int64x8 to an array,
-// at those elements enabled by mask
+// StoreMasked stores an Int64x8 to an array,
+// at those elements enabled by mask.
//
// Asm: VMOVDQU64, CPU Feature: AVX512
//
//go:noescape
func (x Int64x8) StoreMasked(y *[8]int64, mask Mask64x8)
-// Uint8x64 is a 512-bit SIMD vector of 64 uint8
+// Uint8x64 is a 512-bit SIMD vector of 64 uint8s.
type Uint8x64 struct {
uint8x64 v512
vals [64]uint8
}
-// Len returns the number of elements in a Uint8x64
+// Len returns the number of elements in a Uint8x64.
func (x Uint8x64) Len() int { return 64 }
-// LoadUint8x64 loads a Uint8x64 from an array
+// LoadUint8x64 loads a Uint8x64 from an array.
//
//go:noescape
func LoadUint8x64(y *[64]uint8) Uint8x64
-// Store stores a Uint8x64 to an array
+// Store stores a Uint8x64 to an array.
//
//go:noescape
func (x Uint8x64) Store(y *[64]uint8)
// LoadMaskedUint8x64 loads a Uint8x64 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU8.Z, CPU Feature: AVX512
//
@@ -963,34 +963,34 @@ func (x Uint8x64) Store(y *[64]uint8)
func LoadMaskedUint8x64(y *[64]uint8, mask Mask8x64) Uint8x64
// StoreMasked stores a Uint8x64 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU8, CPU Feature: AVX512
//
//go:noescape
func (x Uint8x64) StoreMasked(y *[64]uint8, mask Mask8x64)
-// Uint16x32 is a 512-bit SIMD vector of 32 uint16
+// Uint16x32 is a 512-bit SIMD vector of 32 uint16s.
type Uint16x32 struct {
uint16x32 v512
vals [32]uint16
}
-// Len returns the number of elements in a Uint16x32
+// Len returns the number of elements in a Uint16x32.
func (x Uint16x32) Len() int { return 32 }
-// LoadUint16x32 loads a Uint16x32 from an array
+// LoadUint16x32 loads a Uint16x32 from an array.
//
//go:noescape
func LoadUint16x32(y *[32]uint16) Uint16x32
-// Store stores a Uint16x32 to an array
+// Store stores a Uint16x32 to an array.
//
//go:noescape
func (x Uint16x32) Store(y *[32]uint16)
// LoadMaskedUint16x32 loads a Uint16x32 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU16.Z, CPU Feature: AVX512
//
@@ -998,34 +998,34 @@ func (x Uint16x32) Store(y *[32]uint16)
func LoadMaskedUint16x32(y *[32]uint16, mask Mask16x32) Uint16x32
// StoreMasked stores a Uint16x32 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU16, CPU Feature: AVX512
//
//go:noescape
func (x Uint16x32) StoreMasked(y *[32]uint16, mask Mask16x32)
-// Uint32x16 is a 512-bit SIMD vector of 16 uint32
+// Uint32x16 is a 512-bit SIMD vector of 16 uint32s.
type Uint32x16 struct {
uint32x16 v512
vals [16]uint32
}
-// Len returns the number of elements in a Uint32x16
+// Len returns the number of elements in a Uint32x16.
func (x Uint32x16) Len() int { return 16 }
-// LoadUint32x16 loads a Uint32x16 from an array
+// LoadUint32x16 loads a Uint32x16 from an array.
//
//go:noescape
func LoadUint32x16(y *[16]uint32) Uint32x16
-// Store stores a Uint32x16 to an array
+// Store stores a Uint32x16 to an array.
//
//go:noescape
func (x Uint32x16) Store(y *[16]uint32)
// LoadMaskedUint32x16 loads a Uint32x16 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU32.Z, CPU Feature: AVX512
//
@@ -1033,34 +1033,34 @@ func (x Uint32x16) Store(y *[16]uint32)
func LoadMaskedUint32x16(y *[16]uint32, mask Mask32x16) Uint32x16
// StoreMasked stores a Uint32x16 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU32, CPU Feature: AVX512
//
//go:noescape
func (x Uint32x16) StoreMasked(y *[16]uint32, mask Mask32x16)
-// Uint64x8 is a 512-bit SIMD vector of 8 uint64
+// Uint64x8 is a 512-bit SIMD vector of 8 uint64s.
type Uint64x8 struct {
uint64x8 v512
vals [8]uint64
}
-// Len returns the number of elements in a Uint64x8
+// Len returns the number of elements in a Uint64x8.
func (x Uint64x8) Len() int { return 8 }
-// LoadUint64x8 loads a Uint64x8 from an array
+// LoadUint64x8 loads a Uint64x8 from an array.
//
//go:noescape
func LoadUint64x8(y *[8]uint64) Uint64x8
-// Store stores a Uint64x8 to an array
+// Store stores a Uint64x8 to an array.
//
//go:noescape
func (x Uint64x8) Store(y *[8]uint64)
// LoadMaskedUint64x8 loads a Uint64x8 from an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU64.Z, CPU Feature: AVX512
//
@@ -1068,14 +1068,14 @@ func (x Uint64x8) Store(y *[8]uint64)
func LoadMaskedUint64x8(y *[8]uint64, mask Mask64x8) Uint64x8
// StoreMasked stores a Uint64x8 to an array,
-// at those elements enabled by mask
+// at those elements enabled by mask.
//
// Asm: VMOVDQU64, CPU Feature: AVX512
//
//go:noescape
func (x Uint64x8) StoreMasked(y *[8]uint64, mask Mask64x8)
-// Mask8x64 is a 512-bit SIMD vector of 64 int8
+// Mask8x64 is a mask for a SIMD vector of 64 8-bit elements.
type Mask8x64 struct {
int8x64 v512
vals [64]int8
@@ -1091,7 +1091,7 @@ func Mask8x64FromBits(y uint64) Mask8x64
// Asm: KMOVB, CPU Features: AVX512
func (x Mask8x64) ToBits() uint64
-// Mask16x32 is a 512-bit SIMD vector of 32 int16
+// Mask16x32 is a mask for a SIMD vector of 32 16-bit elements.
type Mask16x32 struct {
int16x32 v512
vals [32]int16
@@ -1107,7 +1107,7 @@ func Mask16x32FromBits(y uint32) Mask16x32
// Asm: KMOVW, CPU Features: AVX512
func (x Mask16x32) ToBits() uint32
-// Mask32x16 is a 512-bit SIMD vector of 16 int32
+// Mask32x16 is a mask for a SIMD vector of 16 32-bit elements.
type Mask32x16 struct {
int32x16 v512
vals [16]int32
@@ -1123,7 +1123,7 @@ func Mask32x16FromBits(y uint16) Mask32x16
// Asm: KMOVD, CPU Features: AVX512
func (x Mask32x16) ToBits() uint16
-// Mask64x8 is a 512-bit SIMD vector of 8 int64
+// Mask64x8 is a mask for a SIMD vector of 8 64-bit elements.
type Mask64x8 struct {
int64x8 v512
vals [8]int64
diff --git a/src/simd/archsimd/unsafe_helpers.go b/src/simd/archsimd/unsafe_helpers.go
index 0123ad77c5..7b98053e70 100644
--- a/src/simd/archsimd/unsafe_helpers.go
+++ b/src/simd/archsimd/unsafe_helpers.go
@@ -1,4 +1,4 @@
-// Code generated by 'go run genfiles.go'; DO NOT EDIT.
+// Code generated by 'tmplgen'; DO NOT EDIT.
//go:build goexperiment.simd
diff --git a/src/time/export_test.go b/src/time/export_test.go
index a4940d12f9..78ce2ad00d 100644
--- a/src/time/export_test.go
+++ b/src/time/export_test.go
@@ -40,6 +40,7 @@ var (
Tzset = tzset
TzsetName = tzsetName
TzsetOffset = tzsetOffset
+ AsynctimerChan = asynctimerchan
)
func LoadFromEmbeddedTZData(zone string) (string, error) {
diff --git a/src/time/tick_test.go b/src/time/tick_test.go
index dcbbcdb145..9b39d28143 100644
--- a/src/time/tick_test.go
+++ b/src/time/tick_test.go
@@ -266,6 +266,10 @@ func BenchmarkTickerResetNaive(b *testing.B) {
}
func TestTimerGC(t *testing.T) {
+ if AsynctimerChan.Value() == "1" {
+ t.Skip("skipping TestTimerGC with asynctimerchan=1")
+ }
+
run := func(t *testing.T, what string, f func()) {
t.Helper()
t.Run(what, func(t *testing.T) {
diff --git a/test/cmplxdivide.go b/test/cmplxdivide.go
index 49cd5bf582..4b8d549fc4 100644
--- a/test/cmplxdivide.go
+++ b/test/cmplxdivide.go
@@ -35,7 +35,7 @@ func main() {
fmt.Printf("BUG\n")
bad = true
}
- fmt.Printf("%v/%v: expected %v error; got %v\n", t.f, t.g, t.out, x)
+ fmt.Printf("%v/%v: got %v, want %v\n", t.f, t.g, x, t.out)
}
}
if bad {
diff --git a/test/codegen/bits.go b/test/codegen/bits.go
index 39969dcdb2..d9c567b078 100644
--- a/test/codegen/bits.go
+++ b/test/codegen/bits.go
@@ -8,274 +8,387 @@ package codegen
import "math/bits"
-/************************************
- * 64-bit instructions
- ************************************/
+//
+// 64 bit instructions
+//
-func bitcheck64_constleft(a uint64) (n int) {
- // amd64:"BTQ [$]63"
+func bitsCheckConstLeftShiftU64(a uint64) (n int) {
+ // amd64:"BTQ [$]63,"
+ // arm64:"TBNZ [$]63,"
+ // riscv64:"MOV [$]" "AND" "BNEZ"
if a&(1<<63) != 0 {
return 1
}
- // amd64:"BTQ [$]60"
+ // amd64:"BTQ [$]60,"
+ // arm64:"TBNZ [$]60,"
+ // riscv64:"MOV [$]" "AND" "BNEZ"
if a&(1<<60) != 0 {
return 1
}
- // amd64:"BTL [$]0"
+ // amd64:"BTL [$]0,"
+ // arm64:"TBZ [$]0,"
+ // riscv64:"ANDI" "BEQZ"
if a&(1<<0) != 0 {
return 1
}
return 0
}
-func bitcheck64_constright(a [8]uint64) (n int) {
- // amd64:"BTQ [$]63"
+func bitsCheckConstRightShiftU64(a [8]uint64) (n int) {
+ // amd64:"BTQ [$]63,"
+ // arm64:"LSR [$]63," "TBNZ [$]0,"
+ // riscv64:"SRLI" "ANDI" "BNEZ"
if (a[0]>>63)&1 != 0 {
return 1
}
- // amd64:"BTQ [$]63"
+ // amd64:"BTQ [$]63,"
+ // arm64:"LSR [$]63," "CBNZ"
+ // riscv64:"SRLI" "BNEZ"
if a[1]>>63 != 0 {
return 1
}
- // amd64:"BTQ [$]63"
+ // amd64:"BTQ [$]63,"
+ // arm64:"LSR [$]63," "CBZ"
+ // riscv64:"SRLI" "BEQZ"
if a[2]>>63 == 0 {
return 1
}
- // amd64:"BTQ [$]60"
+ // amd64:"BTQ [$]60,"
+ // arm64:"LSR [$]60," "TBZ [$]0,"
+ // riscv64:"SRLI", "ANDI" "BEQZ"
if (a[3]>>60)&1 == 0 {
return 1
}
- // amd64:"BTL [$]1"
+ // amd64:"BTL [$]1,"
+ // arm64:"LSR [$]1," "TBZ [$]0,"
+ // riscv64:"SRLI" "ANDI" "BEQZ"
if (a[4]>>1)&1 == 0 {
return 1
}
- // amd64:"BTL [$]0"
+ // amd64:"BTL [$]0,"
+ // arm64:"TBZ [$]0," -"LSR"
+ // riscv64:"ANDI" "BEQZ" -"SRLI"
if (a[5]>>0)&1 == 0 {
return 1
}
- // amd64:"BTL [$]7"
+ // amd64:"BTL [$]7,"
+ // arm64:"LSR [$]5," "TBNZ [$]2,"
+ // riscv64:"SRLI" "ANDI" "BNEZ"
if (a[6]>>5)&4 == 0 {
return 1
}
return 0
}
-func bitcheck64_var(a, b uint64) (n int) {
+func bitsCheckVarU64(a, b uint64) (n int) {
// amd64:"BTQ"
+ // arm64:"MOVD [$]1," "LSL" "TST"
+ // riscv64:"ANDI [$]63," "SLL " "AND "
if a&(1<<(b&63)) != 0 {
return 1
}
- // amd64:"BTQ" -"BT. [$]0"
+ // amd64:"BTQ" -"BT. [$]0,"
+ // arm64:"LSR" "TBZ [$]0,"
+ // riscv64:"ANDI [$]63," "SRL" "ANDI [$]1,"
if (b>>(a&63))&1 != 0 {
return 1
}
return 0
}
-func bitcheck64_mask(a uint64) (n int) {
- // amd64:"BTQ [$]63"
+func bitsCheckMaskU64(a uint64) (n int) {
+ // amd64:"BTQ [$]63,"
+ // arm64:"TBNZ [$]63,"
+ // riscv64:"MOV [$]" "AND" "BNEZ"
if a&0x8000000000000000 != 0 {
return 1
}
- // amd64:"BTQ [$]59"
+ // amd64:"BTQ [$]59,"
+ // arm64:"TBNZ [$]59,"
+ // riscv64:"MOV [$]" "AND" "BNEZ"
if a&0x800000000000000 != 0 {
return 1
}
- // amd64:"BTL [$]0"
+ // amd64:"BTL [$]0,"
+ // arm64:"TBZ [$]0,"
+ // riscv64:"ANDI" "BEQZ"
if a&0x1 != 0 {
return 1
}
return 0
}
-func biton64(a, b uint64) (n uint64) {
+func bitsSetU64(a, b uint64) (n uint64) {
// amd64:"BTSQ"
+ // arm64:"MOVD [$]1," "LSL" "ORR"
+ // riscv64:"ANDI" "SLL" "OR"
n += b | (1 << (a & 63))
- // amd64:"BTSQ [$]63"
+ // amd64:"BTSQ [$]63,"
+ // arm64:"ORR [$]-9223372036854775808,"
+ // riscv64:"MOV [$]" "OR "
n += a | (1 << 63)
- // amd64:"BTSQ [$]60"
+ // amd64:"BTSQ [$]60,"
+ // arm64:"ORR [$]1152921504606846976,"
+ // riscv64:"MOV [$]" "OR "
n += a | (1 << 60)
- // amd64:"ORQ [$]1"
+ // amd64:"ORQ [$]1,"
+ // arm64:"ORR [$]1,"
+ // riscv64:"ORI"
n += a | (1 << 0)
return n
}
-func bitoff64(a, b uint64) (n uint64) {
+func bitsClearU64(a, b uint64) (n uint64) {
// amd64:"BTRQ"
+ // arm64:"MOVD [$]1," "LSL" "BIC"
+ // riscv64:"ANDI" "SLL" "ANDN"
n += b &^ (1 << (a & 63))
- // amd64:"BTRQ [$]63"
+ // amd64:"BTRQ [$]63,"
+ // arm64:"AND [$]9223372036854775807,"
+ // riscv64:"MOV [$]" "AND "
n += a &^ (1 << 63)
- // amd64:"BTRQ [$]60"
+ // amd64:"BTRQ [$]60,"
+ // arm64:"AND [$]-1152921504606846977,"
+ // riscv64:"MOV [$]" "AND "
n += a &^ (1 << 60)
// amd64:"ANDQ [$]-2"
+ // arm64:"AND [$]-2"
+ // riscv64:"ANDI [$]-2"
n += a &^ (1 << 0)
return n
}
-func clearLastBit(x int64, y int32) (int64, int32) {
- // amd64:"ANDQ [$]-2"
+func bitsClearLowest(x int64, y int32) (int64, int32) {
+ // amd64:"ANDQ [$]-2,"
+ // arm64:"AND [$]-2,"
+ // riscv64:"ANDI [$]-2,"
a := (x >> 1) << 1
- // amd64:"ANDL [$]-2"
+ // amd64:"ANDL [$]-2,"
+ // arm64:"AND [$]-2,"
+ // riscv64:"ANDI [$]-2,"
b := (y >> 1) << 1
return a, b
}
-func bitcompl64(a, b uint64) (n uint64) {
+func bitsFlipU64(a, b uint64) (n uint64) {
// amd64:"BTCQ"
+ // arm64:"MOVD [$]1," "LSL" "EOR"
+ // riscv64:"ANDI" "SLL" "XOR "
n += b ^ (1 << (a & 63))
- // amd64:"BTCQ [$]63"
+ // amd64:"BTCQ [$]63,"
+ // arm64:"EOR [$]-9223372036854775808,"
+ // riscv64:"MOV [$]" "XOR "
n += a ^ (1 << 63)
- // amd64:"BTCQ [$]60"
+ // amd64:"BTCQ [$]60,"
+ // arm64:"EOR [$]1152921504606846976,"
+ // riscv64:"MOV [$]" "XOR "
n += a ^ (1 << 60)
- // amd64:"XORQ [$]1"
+ // amd64:"XORQ [$]1,"
+ // arm64:"EOR [$]1,"
+ // riscv64:"XORI [$]1,"
n += a ^ (1 << 0)
return n
}
-/************************************
- * 32-bit instructions
- ************************************/
+//
+// 32 bit instructions
+//
-func bitcheck32_constleft(a uint32) (n int) {
- // amd64:"BTL [$]31"
+func bitsCheckConstShiftLeftU32(a uint32) (n int) {
+ // amd64:"BTL [$]31,"
+ // arm64:"TBNZ [$]31,"
+ // riscv64:"MOV [$]" "AND" "BNEZ"
if a&(1<<31) != 0 {
return 1
}
- // amd64:"BTL [$]28"
+ // amd64:"BTL [$]28,"
+ // arm64:"TBNZ [$]28,"
+ // riscv64:"ANDI" "BNEZ"
if a&(1<<28) != 0 {
return 1
}
- // amd64:"BTL [$]0"
+ // amd64:"BTL [$]0,"
+ // arm64:"TBZ [$]0,"
+ // riscv64:"ANDI" "BEQZ"
if a&(1<<0) != 0 {
return 1
}
return 0
}
-func bitcheck32_constright(a [8]uint32) (n int) {
- // amd64:"BTL [$]31"
+func bitsCheckConstRightShiftU32(a [8]uint32) (n int) {
+ // amd64:"BTL [$]31,"
+ // arm64:"UBFX [$]31," "CBNZW"
+ // riscv64:"SRLI" "ANDI" "BNEZ"
if (a[0]>>31)&1 != 0 {
return 1
}
- // amd64:"BTL [$]31"
+ // amd64:"BTL [$]31,"
+ // arm64:"UBFX [$]31," "CBNZW"
+ // riscv64:"SRLI" "BNEZ"
if a[1]>>31 != 0 {
return 1
}
- // amd64:"BTL [$]31"
+ // amd64:"BTL [$]31,"
+ // arm64:"UBFX [$]31," "CBZW"
+ // riscv64:"SRLI" "BEQZ"
if a[2]>>31 == 0 {
return 1
}
- // amd64:"BTL [$]28"
+ // amd64:"BTL [$]28,"
+ // arm64:"UBFX [$]28," "TBZ"
+ // riscv64:"SRLI" "ANDI" "BEQZ"
if (a[3]>>28)&1 == 0 {
return 1
}
- // amd64:"BTL [$]1"
+ // amd64:"BTL [$]1,"
+ // arm64:"UBFX [$]1," "TBZ"
+ // riscv64:"SRLI" "ANDI" "BEQZ"
if (a[4]>>1)&1 == 0 {
return 1
}
- // amd64:"BTL [$]0"
+ // amd64:"BTL [$]0,"
+ // arm64:"TBZ" -"UBFX" -"SRL"
+ // riscv64:"ANDI" "BEQZ" -"SRLI "
if (a[5]>>0)&1 == 0 {
return 1
}
- // amd64:"BTL [$]7"
+ // amd64:"BTL [$]7,"
+ // arm64:"UBFX [$]5," "TBNZ"
+ // riscv64:"SRLI" "ANDI" "BNEZ"
if (a[6]>>5)&4 == 0 {
return 1
}
return 0
}
-func bitcheck32_var(a, b uint32) (n int) {
+func bitsCheckVarU32(a, b uint32) (n int) {
// amd64:"BTL"
+ // arm64:"AND [$]31," "MOVD [$]1," "LSL" "TSTW"
+ // riscv64:"ANDI [$]31," "SLL " "AND "
if a&(1<<(b&31)) != 0 {
return 1
}
// amd64:"BTL" -"BT. [$]0"
+ // arm64:"AND [$]31," "LSR" "TBZ"
+ // riscv64:"ANDI [$]31," "SRLW " "ANDI [$]1,"
if (b>>(a&31))&1 != 0 {
return 1
}
return 0
}
-func bitcheck32_mask(a uint32) (n int) {
- // amd64:"BTL [$]31"
+func bitsCheckMaskU32(a uint32) (n int) {
+ // amd64:"BTL [$]31,"
+ // arm64:"TBNZ [$]31,"
+ // riscv64:"MOV [$]" "AND" "BNEZ"
if a&0x80000000 != 0 {
return 1
}
- // amd64:"BTL [$]27"
+ // amd64:"BTL [$]27,"
+ // arm64:"TBNZ [$]27,"
+ // riscv64:"ANDI" "BNEZ"
if a&0x8000000 != 0 {
return 1
}
- // amd64:"BTL [$]0"
+ // amd64:"BTL [$]0,"
+ // arm64:"TBZ [$]0,"
+ // riscv64:"ANDI" "BEQZ"
if a&0x1 != 0 {
return 1
}
return 0
}
-func biton32(a, b uint32) (n uint32) {
+func bitsSetU32(a, b uint32) (n uint32) {
// amd64:"BTSL"
+ // arm64:"AND [$]31," "MOVD [$]1," "LSL" "ORR"
+ // riscv64:"ANDI" "SLL" "OR"
n += b | (1 << (a & 31))
- // amd64:"ORL [$]-2147483648"
+ // amd64:"ORL [$]-2147483648,"
+ // arm64:"ORR [$]-2147483648,"
+ // riscv64:"ORI [$]-2147483648,"
n += a | (1 << 31)
- // amd64:"ORL [$]268435456"
+ // amd64:"ORL [$]268435456,"
+ // arm64:"ORR [$]268435456,"
+ // riscv64:"ORI [$]268435456,"
n += a | (1 << 28)
- // amd64:"ORL [$]1"
+ // amd64:"ORL [$]1,"
+ // arm64:"ORR [$]1,"
+ // riscv64:"ORI [$]1,"
n += a | (1 << 0)
return n
}
-func bitoff32(a, b uint32) (n uint32) {
+func bitsClearU32(a, b uint32) (n uint32) {
// amd64:"BTRL"
+ // arm64:"AND [$]31," "MOVD [$]1," "LSL" "BIC"
+ // riscv64:"ANDI" "SLL" "ANDN"
n += b &^ (1 << (a & 31))
- // amd64:"ANDL [$]2147483647"
+ // amd64:"ANDL [$]2147483647,"
+ // arm64:"AND [$]2147483647,"
+ // riscv64:"ANDI [$]2147483647,"
n += a &^ (1 << 31)
- // amd64:"ANDL [$]-268435457"
+ // amd64:"ANDL [$]-268435457,"
+ // arm64:"AND [$]-268435457,"
+ // riscv64:"ANDI [$]-268435457,"
n += a &^ (1 << 28)
- // amd64:"ANDL [$]-2"
+ // amd64:"ANDL [$]-2,"
+ // arm64:"AND [$]-2,"
+ // riscv64:"ANDI [$]-2,"
n += a &^ (1 << 0)
return n
}
-func bitcompl32(a, b uint32) (n uint32) {
+func bitsFlipU32(a, b uint32) (n uint32) {
// amd64:"BTCL"
+ // arm64:"AND [$]31," "MOVD [$]1," "LSL" "EOR"
+ // riscv64:"ANDI" "SLL" "XOR "
n += b ^ (1 << (a & 31))
- // amd64:"XORL [$]-2147483648"
+ // amd64:"XORL [$]-2147483648,"
+ // arm64:"EOR [$]-2147483648,"
+ // riscv64:"XORI [$]-2147483648,"
n += a ^ (1 << 31)
- // amd64:"XORL [$]268435456"
+ // amd64:"XORL [$]268435456,"
+ // arm64:"EOR [$]268435456,"
+ // riscv64:"XORI [$]268435456,"
n += a ^ (1 << 28)
- // amd64:"XORL [$]1"
+ // amd64:"XORL [$]1,"
+ // arm64:"EOR [$]1,"
+ // riscv64:"XORI [$]1,"
n += a ^ (1 << 0)
return n
}
-// check direct operation on memory with constant and shifted constant sources
-func bitOpOnMem(a []uint32, b, c, d uint32) {
+func bitsOpOnMem(a []uint32, b, c, d uint32) {
+ // check direct operation on memory with constant
+
// amd64:`ANDL\s[$]200,\s\([A-Z][A-Z0-9]+\)`
a[0] &= 200
// amd64:`ORL\s[$]220,\s4\([A-Z][A-Z0-9]+\)`
@@ -284,24 +397,24 @@ func bitOpOnMem(a []uint32, b, c, d uint32) {
a[2] ^= 240
}
-func bitcheckMostNegative(b uint8) bool {
+func bitsCheckMostNegative(b uint8) bool {
// amd64:"TESTB"
+ // arm64:"TSTW" "CSET"
+ // riscv64:"ANDI [$]128," "SNEZ" -"ADDI"
return b&0x80 == 0x80
}
-// Check AND masking on arm64 (Issue #19857)
-
-func and_mask_1(a uint64) uint64 {
+func bitsIssue19857a(a uint64) uint64 {
// arm64:`AND `
return a & ((1 << 63) - 1)
}
-func and_mask_2(a uint64) uint64 {
+func bitsIssue19857b(a uint64) uint64 {
// arm64:`AND `
return a & (1 << 63)
}
-func and_mask_3(a, b uint32) (uint32, uint32) {
+func bitsIssue19857c(a, b uint32) (uint32, uint32) {
// arm/7:`BIC`,-`AND`
a &= 0xffffaaaa
// arm/7:`BFC`,-`AND`,-`BIC`
@@ -309,34 +422,39 @@ func and_mask_3(a, b uint32) (uint32, uint32) {
return a, b
}
-// Check generation of arm64 BIC/EON/ORN instructions
-
-func op_bic(x, y uint32) uint32 {
+func bitsAndNot(x, y uint32) uint32 {
// arm64:`BIC `,-`AND`
+ // loong64:"ANDN " -"AND "
+ // riscv64:"ANDN" -"AND "
return x &^ y
}
-func op_eon(x, y, z uint32, a []uint32, n, m uint64) uint64 {
+func bitsXorNot(x, y, z uint32, a []uint32, n, m uint64) uint64 {
// arm64:`EON `,-`EOR`,-`MVN`
+ // riscv64:"XNOR " -"MOV [$]" -"XOR"
a[0] = x ^ (y ^ 0xffffffff)
// arm64:`EON `,-`EOR`,-`MVN`
+ // riscv64:"XNOR" -"XOR"
a[1] = ^(y ^ z)
// arm64:`EON `,-`XOR`
+ // riscv64:"XNOR" -"XOR" -"NOT"
a[2] = x ^ ^z
// arm64:`EON `,-`EOR`,-`MVN`
+ // riscv64:"XNOR" -"MOV [$]" -"XOR"
return n ^ (m ^ 0xffffffffffffffff)
}
-func op_orn(x, y uint32) uint32 {
- // arm64:`ORN `,-`ORR`
- // loong64:"ORN" ,-"OR "
+func bitsOrNot(x, y uint32) uint32 {
+ // arm64:"ORN " -"ORR"
+ // loong64:"ORN" -"OR "
+ // riscv64:"ORN" -"OR "
return x | ^y
}
-func op_nor(x int64, a []int64) {
+func bitsNotOr(x int64, a []int64) {
// loong64: "MOVV [$]0" "NOR R"
a[0] = ^(0x1234 | x)
// loong64:"NOR" -"XOR"
@@ -345,64 +463,60 @@ func op_nor(x int64, a []int64) {
a[2] = ^(0x12 | 0x34)
}
-func op_andn(x, y uint32) uint32 {
- // loong64:"ANDN " -"AND "
- return x &^ y
-}
-
-// check bitsets
-func bitSetPowerOf2Test(x int) bool {
+func bitsSetPowerOf2Test(x int) bool {
// amd64:"BTL [$]3"
+ // riscv64:"ANDI [$]8," "SNEZ" -"ADDI"
return x&8 == 8
}
-func bitSetTest(x int) bool {
+func bitsSetTest(x int) bool {
// amd64:"ANDL [$]9, AX"
// amd64:"CMPQ AX, [$]9"
+ // riscv64:"ANDI [$]9," "ADDI [$]-9," "SEQZ"
return x&9 == 9
}
-// mask contiguous one bits
-func cont1Mask64U(x uint64) uint64 {
+func bitsMaskContiguousOnes64U(x uint64) uint64 {
// s390x:"RISBGZ [$]16, [$]47, [$]0,"
return x & 0x0000ffffffff0000
}
-// mask contiguous zero bits
-func cont0Mask64U(x uint64) uint64 {
+func bitsMaskContiguousZeroes64U(x uint64) uint64 {
// s390x:"RISBGZ [$]48, [$]15, [$]0,"
return x & 0xffff00000000ffff
}
-func issue44228a(a []int64, i int) bool {
+func bitsIssue44228a(a []int64, i int) bool {
// amd64: "BTQ", -"SHL"
return a[i>>6]&(1<<(i&63)) != 0
}
-func issue44228b(a []int32, i int) bool {
+
+func bitsIssue44228b(a []int32, i int) bool {
// amd64: "BTL", -"SHL"
return a[i>>5]&(1<<(i&31)) != 0
}
-func issue48467(x, y uint64) uint64 {
+func bitsIssue48467(x, y uint64) uint64 {
// arm64: -"NEG"
d, borrow := bits.Sub64(x, y, 0)
return x - d&(-borrow)
}
-func foldConst(x, y uint64) uint64 {
+func bitsFoldConst(x, y uint64) uint64 {
// arm64: "ADDS [$]7" -"MOVD [$]7"
// ppc64x: "ADDC [$]7,"
d, b := bits.Add64(x, 7, 0)
return b & d
}
-func foldConstOutOfRange(a uint64) uint64 {
+func bitsFoldConstOutOfRange(a uint64) uint64 {
// arm64: "MOVD [$]19088744" -"ADD [$]19088744"
return a + 0x1234568
}
-// Verify sign-extended values are not zero-extended under a bit mask (#61297)
-func signextendAndMask8to64(a int8) (s, z uint64) {
+func bitsSignExtendAndMask8to64U(a int8) (s, z uint64) {
+ // Verify sign-extended values are not zero-extended under a bit mask (#61297)
+
// ppc64x: "MOVB", "ANDCC [$]1015,"
s = uint64(a) & 0x3F7
// ppc64x: -"MOVB", "ANDCC [$]247,"
@@ -410,8 +524,9 @@ func signextendAndMask8to64(a int8) (s, z uint64) {
return
}
-// Verify zero-extended values are not sign-extended under a bit mask (#61297)
-func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) {
+func bitsZeroExtendAndMask8toU64(a int8, b int16) (x, y uint64) {
+ // Verify zero-extended values are not sign-extended under a bit mask (#61297)
+
// ppc64x: -"MOVB ", -"ANDCC", "MOVBZ"
x = uint64(a) & 0xFF
// ppc64x: -"MOVH ", -"ANDCC", "MOVHZ"
@@ -419,8 +534,9 @@ func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) {
return
}
-// Verify rotate and mask instructions, and further simplified instructions for small types
-func bitRotateAndMask(io64 [8]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) {
+func bitsRotateAndMask(io64 [8]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) {
+ // Verify rotate and mask instructions, and further simplified instructions for small types
+
// ppc64x: "RLDICR [$]0, R[0-9]*, [$]47, R"
io64[0] = io64[0] & 0xFFFFFFFFFFFF0000
// ppc64x: "RLDICL [$]0, R[0-9]*, [$]16, R"
diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go
index bcce21e404..0b550adc05 100644
--- a/test/codegen/comparisons.go
+++ b/test/codegen/comparisons.go
@@ -660,13 +660,13 @@ func equalVarString8(a string) bool {
return a[:8] == b
}
-func equalVarStringNoSpill(a,b string) bool {
+func equalVarStringNoSpill(a, b string) bool {
s := string("ZZZZZZZZZ")
// arm64:".*memequal"
memeq1 := a[:9] == s
// arm64:-".*"
memeq2 := s == a[:9]
- // arm64:-"MOVB\tR0,.*SP",".*memequal"
+ // arm64:-"MOVB R0,.*SP",".*memequal"
memeq3 := s == b[:9]
return memeq1 && memeq2 && memeq3
}
diff --git a/test/codegen/simd.go b/test/codegen/simd.go
index 8f3a1a9f46..04e01944de 100644
--- a/test/codegen/simd.go
+++ b/test/codegen/simd.go
@@ -6,11 +6,14 @@
// These tests check code generation of simd peephole optimizations.
-//go:build goexperiment.simd
+//go:build goexperiment.simd && amd64
package codegen
-import "simd/archsimd"
+import (
+ "math"
+ "simd/archsimd"
+)
func vptest1() bool {
v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1})
@@ -77,3 +80,27 @@ func simdMaskedMerge() archsimd.Int16x16 {
mask := archsimd.Mask16x16FromBits(5)
return x.Add(y).Merge(x, mask) // amd64:`VPBLENDVB\s.*$`
}
+
+var nan = math.NaN()
+var floats64s = []float64{0, 1, 2, nan, 4, nan, 6, 7, 8, 9, 10, 11, nan, 13, 14, 15}
+var sinkInt64s = make([]int64, 100)
+
+func simdIsNaN() {
+ x := archsimd.LoadFloat64x4Slice(floats64s)
+ y := archsimd.LoadFloat64x4Slice(floats64s[4:])
+ a := x.IsNaN()
+ b := y.IsNaN()
+ // amd64:"VCMPPD [$]3," -"VPOR"
+ c := a.Or(b)
+ c.ToInt64x4().StoreSlice(sinkInt64s)
+}
+
+func simdIsNaN512() {
+ x := archsimd.LoadFloat64x8Slice(floats64s)
+ y := archsimd.LoadFloat64x8Slice(floats64s[8:])
+ a := x.IsNaN()
+ b := y.IsNaN()
+ // amd64:"VCMPPD [$]3," -"VPOR"
+ c := a.Or(b)
+ c.ToInt64x8().StoreSlice(sinkInt64s)
+}
diff --git a/test/fixedbugs/issue76950.go b/test/fixedbugs/issue76950.go
new file mode 100644
index 0000000000..b5716e0fc6
--- /dev/null
+++ b/test/fixedbugs/issue76950.go
@@ -0,0 +1,67 @@
+// compile
+
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+func MatchLog(input string) bool {
+ pos := 0
+ n := len(input)
+ matchState := -1
+ var c byte
+
+ goto State12
+
+State8:
+ goto State65
+
+State12:
+ if pos >= n {
+ goto End
+ }
+ c = input[pos]
+ switch {
+ case c >= 0x09 && c <= 0x0A || c >= 0x0C && c <= 0x0D || c == ' ':
+ case c >= '0' && c <= '9':
+ case c >= 'A' && c <= 'Z' || c == '_' || c >= 'b' && c <= 'z':
+ case c == '[':
+ goto State8
+ case c == 'a':
+ default:
+ goto End
+ }
+
+State64:
+ matchState = 179
+ if pos >= n {
+ goto End
+ }
+ pos = n
+ goto State64
+
+State65:
+
+State66:
+ matchState = 181
+ if pos >= n {
+ goto End
+ }
+ pos = n
+ goto State66
+
+End:
+ if matchState != -1 {
+ switch matchState {
+ case 178:
+ case 156:
+ case 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175:
+ case 176, 177, 181, 182, 183:
+ case 179, 184:
+ case 180:
+ }
+ return true
+ }
+ return false
+}
diff --git a/test/map.go b/test/map.go
index 2c1cf8a140..b72fe59bd7 100644
--- a/test/map.go
+++ b/test/map.go
@@ -431,7 +431,7 @@ func testbasic() {
{
_, b := mpTi[apT[i]]
if b {
- panic(fmt.Sprintf("tuple nonexistence decl: mpTi[apt[%d]]", i))
+ panic(fmt.Sprintf("tuple nonexistence decl: mpTi[apT[%d]]", i))
}
_, b = mpTi[apT[i]]
if b {
diff --git a/test/stringrange.go b/test/stringrange.go
index 99e5edb5a4..d98013b876 100644
--- a/test/stringrange.go
+++ b/test/stringrange.go
@@ -59,7 +59,7 @@ func main() {
for _, c := range "a\xed\xa0\x80a" {
if c != 'a' && c != utf8.RuneError {
- fmt.Printf("surrogate UTF-8 does not error: %U\n", c)
+ fmt.Printf("surrogate UTF-8 does not produce an error: %U\n", c)
ok = false
}
}
diff --git a/test/typeparam/typelist.go b/test/typeparam/typelist.go
index cd8ef7d6e7..b3226301fb 100644
--- a/test/typeparam/typelist.go
+++ b/test/typeparam/typelist.go
@@ -32,7 +32,7 @@ func _[T interface{ ~int }](x T) {
var _ T = T(myint(42))
}
-// Indexing a generic type which has a an array as core type.
+// Indexing a generic type which has an array as core type.
func _[T interface{ ~[10]int }](x T) {
_ = x[9] // ok
}
diff --git a/test/uintptrescapes.dir/main.go b/test/uintptrescapes.dir/main.go
index afda6218ad..0ccb18f9ff 100644
--- a/test/uintptrescapes.dir/main.go
+++ b/test/uintptrescapes.dir/main.go
@@ -49,7 +49,7 @@ func main() {
defer wg.Done()
b := F1()
if b != 42 {
- fmt.Printf("F1: got %d, expected 42\n", b)
+ fmt.Printf("F1: got %d, want 42\n", b)
c <- false
}
}()
@@ -58,7 +58,7 @@ func main() {
defer wg.Done()
b := F2()
if b != 42 {
- fmt.Printf("F2: got %d, expected 42\n", b)
+ fmt.Printf("F2: got %d, want 42\n", b)
c <- false
}
}()
@@ -67,7 +67,7 @@ func main() {
defer wg.Done()
b := M1()
if b != 42 {
- fmt.Printf("M1: got %d, expected 42\n", b)
+ fmt.Printf("M1: got %d, want 42\n", b)
c <- false
}
}()
@@ -76,7 +76,7 @@ func main() {
defer wg.Done()
b := M2()
if b != 42 {
- fmt.Printf("M2: got %d, expected 42\n", b)
+ fmt.Printf("M2: got %d, want 42\n", b)
c <- false
}
}()