diff options
| author | Junyang Shao <shaojunyang@google.com> | 2026-01-07 20:06:48 +0000 |
|---|---|---|
| committer | Junyang Shao <shaojunyang@google.com> | 2026-01-07 20:06:49 +0000 |
| commit | b8191a2f9893220bdbe52ecebb37e293847d98f5 (patch) | |
| tree | ffaec06811834d36737d182a65831d65cd8ce798 | |
| parent | c599a8f2385849a225d02843b3c6389dbfc5aa69 (diff) | |
| parent | f6ebd91129e13ef7f495550a4fc8fa74769f6a2d (diff) | |
| download | go-b8191a2f9893220bdbe52ecebb37e293847d98f5.tar.xz | |
[release-branch.go1.26] all: merge master (f6ebd91) into release-branch.go1.26
Merge List:
+ 2026-01-07 f6ebd91129 all: update vendored x/tools
+ 2026-01-06 d1d0fc7a97 os/exec: avoid atomic.Bool for Cmd.startCalled
+ 2026-01-05 9b2e3b9a02 simd/archsimd: use V(P)MOVMSK for mask ToBits if possible
+ 2026-01-02 f8ee0f8475 cmd/go/testdata/vcstest/git: use git commands that work on older git versions
+ 2026-01-02 b094749bad test/codegen: codify bit related code generation for arm64
+ 2026-01-02 e84983fa40 cmd/compile: optimize SIMD IsNaN.Or(IsNaN)
+ 2026-01-02 8244b85677 simd/archsimd: add tests for IsNaN
+ 2026-01-02 13440fb518 simd/archsimd: make IsNaN unary
+ 2026-01-02 c3550b3352 simd/archsimd: correct documentation of Mask types
+ 2026-01-02 34ad26341d net/rpc: correct comment for isExportedOrBuiltinType function
+ 2025-12-30 b28808d838 cmd/go/internal/modindex: fix obvious bug using failed type assertion
+ 2025-12-30 d64add4d60 simd/archsimd: adjust documentations slightly
+ 2025-12-30 1843cfbcd6 runtime/secret: make tests more sturdy
+ 2025-12-30 fd45d70799 all: fix some minor grammatical issues in the comments
+ 2025-12-30 df4e08ac65 test/codegen: fix a tab in comparisons.go to ensure pattern works
+ 2025-12-30 cd668d744f cmd/compile: disable inlining for functions using runtime.deferrangefunc
+ 2025-12-29 06eff0f7c3 simd/archsimd: add tests for Saturate-Concat operations
+ 2025-12-29 110aaf7137 simd/archsimd: add tests for Saturate operations
+ 2025-12-29 22e7b94e7f simd/archsimd: add tests for ExtendLo operations
+ 2025-12-29 76dddce293 simd/archsimd: remove redundant suffix of ExtendLo operations
+ 2025-12-29 6ecdd2fc6e simd/archsimd: add more tests for Convert operations
+ 2025-12-29 e0c99fe285 simd/archsimd: add more tests for Truncate operations
+ 2025-12-29 08369369e5 reflect: document Call/CallSlice panic when v is unexported field
+ 2025-12-29 ca8effbde1 internal/coverage/decodemeta: correct wording in unknown version error
+ 2025-12-29 0b06b68e21 encoding/gob: clarify docs about pointers to zero values not being sent
+ 2025-12-29 9cb3edbfe9 regexp: standardize error message format in find_test.go
+ 2025-12-29 b3ed0627ce tests: improve consistency and clarity of test diagnostics
+ 2025-12-29 3dcb48d298 test: follow got/want convention in uintptrescapes test
+ 2025-12-29 f7b7e94b0a test: clarify log message for surrogate UTF-8 check
+ 2025-12-29 e790d59674 simd/archsimd: add tests for Truncate operations
+ 2025-12-27 f4cec7917c cmd: fix unused errors reported by ineffassign
+ 2025-12-27 ca13fe02c4 simd/archsimd: add more tests for Convert operations
+ 2025-12-27 037c047f2c simd/archsimd: add more tests for Extend operations
+ 2025-12-26 7971fcdf53 test/codegen: tidy tests for bits
+ 2025-12-24 0f620776d7 simd/archsimd: fix "go generate" command
+ 2025-12-24 a5fe8c07ae simd/archsimd: guard test helpers with amd64 tag
+ 2025-12-23 a23d1a4ebe bytes: improve consistency in split test messages
+ 2025-12-23 866e461b96 cmd/go: update pkgsite doc command to v0.0.0-20251223195805-1a3bd3c788fe
+ 2025-12-23 08dc8393d7 time: skip test that will fail with GO111MODULE=off
+ 2025-12-23 43ebed88cc runtime: improve a log message in TestCleanupLost
+ 2025-12-23 81283ad339 runtime: fix nGsyscallNoP accounting
+ 2025-12-23 3e0e1667f6 test/codegen: codify bit related code generation for riscv64
+ 2025-12-23 3faf988f21 errors: add a test verifying join does not flatten errors
+ 2025-12-23 2485a0bc2c cmd/asm/internal/asm: run riscv64 end-to-end tests for each profile
+ 2025-12-23 8254d66eab cmd/asm/internal/asm: abort end to end test if assembly failed
+ 2025-12-23 1b3db48db7 Revert "errors: optimize errors.Join for single unwrappable errors"
+ 2025-12-23 b6b8b2fe6e cmd/compile: handle propagating an out-of-range jump table index
+ 2025-12-22 2cd0371a0a debug/pe: avoid panic in File.ImportedSymbols
+ 2025-12-22 91435be153 runtime: revert entry point on freebsd/arm64
+ 2025-12-22 c1efada1d2 simd/archsimd: correct documentation for pairwise operations
+ 2025-12-22 3d77a0b15e os/exec: second call to Cmd.Start is always an error
+ 2025-12-20 7ecb1f36ac simd/archsimd: add HasAVX2() guards to tests that need them
+ 2025-12-19 70c22e0ad7 simd/archsimd: delete DotProductQuadruple methods for now
+ 2025-12-19 42cda7c1df simd/archsimd: add Grouped for 256- and 512-bit SaturateTo(U)Int16Concat, and fix type
+ 2025-12-19 baa0ae3aaa simd/archsimd: correct type and instruction for SaturateToUint8
+ 2025-12-19 d46c58debb go/doc: link to struct fields in the same package
+ 2025-12-19 25ed6c7f9b cmd/go/internal/doc: update pkgsite version
+ 2025-12-19 4411edf972 simd/archsimd: reword documentation for some operations
+ 2025-12-19 7d9418a19c simd/archsimd: reword documentation of comparison operations
+ 2025-12-18 d00e96d3ae internal/cpu: repair VNNI feature check
+ 2025-12-18 cfc024daeb simd/archsimd: reword documentation for conversion ops
+ 2025-12-17 ad91f5d241 simd/archsimd: reword documentation of shfit operations
+ 2025-12-17 b8c4cc63e7 runtime: keep track of secret allocation size
+ 2025-12-17 8564fede89 cmd/go: remove reference to no longer existing -i flag
+ 2025-12-17 eecdb61eeb crypto: rename fips140v2.0 to fips140v1.26
+ 2025-12-17 05e41225f6 simd/archsimd: reword documentation of As methods
+ 2025-12-17 516699848b runtime/secret: warn users about allocations, loosen guarantees
+ 2025-12-16 8c28ab936a cmd/cgo: don't emit C local if it is not used
+ 2025-12-16 65b71c11d4 crypto/internal/fips140only: test fips140=only mode
+ 2025-12-16 ea1aa76554 go/doc: exclude examples with results
+ 2025-12-16 5046bdf8a6 crypto/tls: reject trailing messages after client/server hello
+ 2025-12-16 3f6eabdf09 cmd/compile: use unsigned constant when folding loads for SIMD ops with constants
+ 2025-12-16 a4b5b92055 cmd/dist: preserve existing GOEXPERIMENTs when running tests with additional experiments
Change-Id: I84ad4ceba344761142b98587c07d186cf2d638ff
143 files changed, 14940 insertions, 5317 deletions
diff --git a/src/archive/tar/reader_test.go b/src/archive/tar/reader_test.go index de3d365304..c7611ca044 100644 --- a/src/archive/tar/reader_test.go +++ b/src/archive/tar/reader_test.go @@ -787,7 +787,7 @@ type readBadSeeker struct{ io.ReadSeeker } func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") } -// TestReadTruncation test the ending condition on various truncated files and +// TestReadTruncation tests the ending condition on various truncated files and // that truncated files are still detected even if the underlying io.Reader // satisfies io.Seeker. func TestReadTruncation(t *testing.T) { diff --git a/src/archive/tar/stat_unix.go b/src/archive/tar/stat_unix.go index f999f56db6..891a1a3b4a 100644 --- a/src/archive/tar/stat_unix.go +++ b/src/archive/tar/stat_unix.go @@ -19,7 +19,7 @@ func init() { sysStat = statUnix } -// userMap and groupMap caches UID and GID lookups for performance reasons. +// userMap and groupMap cache UID and GID lookups for performance reasons. // The downside is that renaming uname or gname by the OS never takes effect. var userMap, groupMap sync.Map // map[int]string diff --git a/src/archive/tar/strconv.go b/src/archive/tar/strconv.go index 217efe9e2e..d3c28a8c4e 100644 --- a/src/archive/tar/strconv.go +++ b/src/archive/tar/strconv.go @@ -312,7 +312,7 @@ func formatPAXRecord(k, v string) (string, error) { // "%d %s=%s\n" % (size, key, value) // // Keys and values should be UTF-8, but the number of bad writers out there -// forces us to be a more liberal. +// forces us to be more liberal. // Thus, we only reject all keys with NUL, and only reject NULs in values // for the PAX version of the USTAR string fields. // The key must not contain an '=' character. diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go index 9547ede312..891aef2c8b 100644 --- a/src/bytes/bytes_test.go +++ b/src/bytes/bytes_test.go @@ -961,7 +961,7 @@ func TestSplit(t *testing.T) { if tt.n < 0 { b := sliceOfString(Split([]byte(tt.s), []byte(tt.sep))) if !slices.Equal(result, b) { - t.Errorf("Split disagrees withSplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) + t.Errorf("Split disagrees with SplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) } } if len(a) > 0 { @@ -1023,7 +1023,7 @@ func TestSplitAfter(t *testing.T) { if tt.n < 0 { b := sliceOfString(SplitAfter([]byte(tt.s), []byte(tt.sep))) if !slices.Equal(result, b) { - t.Errorf("SplitAfter disagrees withSplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) + t.Errorf("SplitAfter disagrees with SplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a) } } } diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index e53263356d..28dce50d60 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -199,6 +199,11 @@ Diff: } obj.Flushplist(ctxt, pList, nil) + if !ok { + // If we've encountered errors, the output is unlikely to be sane. + t.FailNow() + } + for p := top; p != nil; p = p.Link { if p.As == obj.ATEXT { text = p.From.Sym @@ -486,16 +491,35 @@ func TestPPC64EndToEnd(t *testing.T) { } } -func TestRISCVEndToEnd(t *testing.T) { - testEndToEnd(t, "riscv64", "riscv64") +func testRISCV64AllProfiles(t *testing.T, testFn func(t *testing.T)) { + t.Helper() + + defer func(orig int) { buildcfg.GORISCV64 = orig }(buildcfg.GORISCV64) + + for _, goriscv64 := range []int{20, 22, 23} { + t.Run(fmt.Sprintf("rva%vu64", goriscv64), func(t *testing.T) { + buildcfg.GORISCV64 = goriscv64 + testFn(t) + }) + } +} + +func TestRISCV64EndToEnd(t *testing.T) { + testRISCV64AllProfiles(t, func(t *testing.T) { + testEndToEnd(t, "riscv64", "riscv64") + }) } -func TestRISCVErrors(t *testing.T) { - testErrors(t, "riscv64", "riscv64error") +func TestRISCV64Errors(t *testing.T) { + testRISCV64AllProfiles(t, func(t *testing.T) { + testErrors(t, "riscv64", "riscv64error") + }) } -func TestRISCVValidation(t *testing.T) { - testErrors(t, "riscv64", "riscv64validation") +func TestRISCV64Validation(t *testing.T) { + testRISCV64AllProfiles(t, func(t *testing.T) { + testErrors(t, "riscv64", "riscv64validation") + }) } func TestS390XEndToEnd(t *testing.T) { diff --git a/src/cmd/cgo/internal/test/issue76861.go b/src/cmd/cgo/internal/test/issue76861.go new file mode 100644 index 0000000000..225e2acc3f --- /dev/null +++ b/src/cmd/cgo/internal/test/issue76861.go @@ -0,0 +1,12 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build cgo + +package cgotest + +// Issue 43639: No runtime test needed, make sure package +// cmd/cgo/internal/test/issue76861 compiles without error. + +import _ "cmd/cgo/internal/test/issue76861" diff --git a/src/cmd/cgo/internal/test/issue76861/a.go b/src/cmd/cgo/internal/test/issue76861/a.go new file mode 100644 index 0000000000..18a7bda490 --- /dev/null +++ b/src/cmd/cgo/internal/test/issue76861/a.go @@ -0,0 +1,13 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package issue76861 + +// #cgo CFLAGS: -Wall -Werror +// void issue76861(void) {} +import "C" + +func Issue76861() { + C.issue76861() +} diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go index dc1e5b29e5..ac2ce8fd0d 100644 --- a/src/cmd/cgo/out.go +++ b/src/cmd/cgo/out.go @@ -783,13 +783,13 @@ func (p *Package) writeOutputFunc(fgcc *os.File, n *Name) { // We're trying to write a gcc struct that matches gc's layout. // Use packed attribute to force no padding in this struct in case // gcc has different packing requirements. - fmt.Fprintf(fgcc, "\t%s %v *_cgo_a = v;\n", ctype, p.packedAttribute()) - if n.FuncType.Result != nil { - // Save the stack top for use below. - fmt.Fprintf(fgcc, "\tchar *_cgo_stktop = _cgo_topofstack();\n") - } tr := n.FuncType.Result + if (n.Kind != "macro" && len(n.FuncType.Params) > 0) || tr != nil { + fmt.Fprintf(fgcc, "\t%s %v *_cgo_a = v;\n", ctype, p.packedAttribute()) + } if tr != nil { + // Save the stack top for use below. + fmt.Fprintf(fgcc, "\tchar *_cgo_stktop = _cgo_topofstack();\n") fmt.Fprintf(fgcc, "\t__typeof__(_cgo_a->r) _cgo_r;\n") } fmt.Fprintf(fgcc, "\t_cgo_tsan_acquire();\n") @@ -819,7 +819,7 @@ func (p *Package) writeOutputFunc(fgcc *os.File, n *Name) { fmt.Fprintf(fgcc, "\t_cgo_errno = errno;\n") } fmt.Fprintf(fgcc, "\t_cgo_tsan_release();\n") - if n.FuncType.Result != nil { + if tr != nil { // The cgo call may have caused a stack copy (via a callback). // Adjust the return value pointer appropriately. fmt.Fprintf(fgcc, "\t_cgo_a = (void*)((char*)_cgo_a + (_cgo_topofstack() - _cgo_stktop));\n") diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index f6deba3ec1..c4d0fd69c6 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -1,4 +1,4 @@ -// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. package amd64 @@ -175,7 +175,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSQD128_128, ssa.OpAMD64VPMOVSQD128_256, ssa.OpAMD64VPMOVSQD256, + ssa.OpAMD64VPMOVUSWB128_128, + ssa.OpAMD64VPMOVUSWB128_256, ssa.OpAMD64VPMOVUSWB256, + ssa.OpAMD64VPMOVUSDB128_128, + ssa.OpAMD64VPMOVUSDB128_256, + ssa.OpAMD64VPMOVUSDB128_512, + ssa.OpAMD64VPMOVUSQB128_128, + ssa.OpAMD64VPMOVUSQB128_256, + ssa.OpAMD64VPMOVUSQB128_512, ssa.OpAMD64VPMOVUSDW128_128, ssa.OpAMD64VPMOVUSDW128_256, ssa.OpAMD64VPMOVUSDW256, @@ -242,12 +250,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPADDQ256, ssa.OpAMD64VPADDQ512, ssa.OpAMD64VHADDPS128, - ssa.OpAMD64VHADDPS256, ssa.OpAMD64VHADDPD128, - ssa.OpAMD64VHADDPD256, ssa.OpAMD64VPHADDW128, - ssa.OpAMD64VPHADDW256, ssa.OpAMD64VPHADDD128, + ssa.OpAMD64VHADDPS256, + ssa.OpAMD64VHADDPD256, + ssa.OpAMD64VPHADDW256, ssa.OpAMD64VPHADDD256, ssa.OpAMD64VPHADDSW128, ssa.OpAMD64VPHADDSW256, @@ -512,12 +520,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSUBQ256, ssa.OpAMD64VPSUBQ512, ssa.OpAMD64VHSUBPS128, - ssa.OpAMD64VHSUBPS256, ssa.OpAMD64VHSUBPD128, - ssa.OpAMD64VHSUBPD256, ssa.OpAMD64VPHSUBW128, - ssa.OpAMD64VPHSUBW256, ssa.OpAMD64VPHSUBD128, + ssa.OpAMD64VHSUBPS256, + ssa.OpAMD64VHSUBPD256, + ssa.OpAMD64VPHSUBW256, ssa.OpAMD64VPHSUBD256, ssa.OpAMD64VPHSUBSW128, ssa.OpAMD64VPHSUBSW256, @@ -731,12 +739,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORVQMasked128, ssa.OpAMD64VPRORVQMasked256, ssa.OpAMD64VPRORVQMasked512, - ssa.OpAMD64VPACKSSDWMasked128, ssa.OpAMD64VPACKSSDWMasked256, ssa.OpAMD64VPACKSSDWMasked512, - ssa.OpAMD64VPACKUSDWMasked128, + ssa.OpAMD64VPACKSSDWMasked128, ssa.OpAMD64VPACKUSDWMasked256, ssa.OpAMD64VPACKUSDWMasked512, + ssa.OpAMD64VPACKUSDWMasked128, ssa.OpAMD64VSCALEFPSMasked128, ssa.OpAMD64VSCALEFPSMasked256, ssa.OpAMD64VSCALEFPSMasked512, @@ -1010,7 +1018,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSQDMasked128_128, ssa.OpAMD64VPMOVSQDMasked128_256, ssa.OpAMD64VPMOVSQDMasked256, + ssa.OpAMD64VPMOVUSWBMasked128_128, + ssa.OpAMD64VPMOVUSWBMasked128_256, ssa.OpAMD64VPMOVUSWBMasked256, + ssa.OpAMD64VPMOVUSDBMasked128_128, + ssa.OpAMD64VPMOVUSDBMasked128_256, + ssa.OpAMD64VPMOVUSDBMasked128_512, + ssa.OpAMD64VPMOVUSQBMasked128_128, + ssa.OpAMD64VPMOVUSQBMasked128_256, + ssa.OpAMD64VPMOVUSQBMasked128_512, ssa.OpAMD64VPMOVUSDWMasked128_128, ssa.OpAMD64VPMOVUSDWMasked128_256, ssa.OpAMD64VPMOVUSDWMasked256, @@ -1308,12 +1324,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPERMI2Q256, ssa.OpAMD64VPERMI2PD512, ssa.OpAMD64VPERMI2Q512, - ssa.OpAMD64VPDPBUSD128, - ssa.OpAMD64VPDPBUSD256, - ssa.OpAMD64VPDPBUSD512, - ssa.OpAMD64VPDPBUSDS128, - ssa.OpAMD64VPDPBUSDS256, - ssa.OpAMD64VPDPBUSDS512, ssa.OpAMD64VFMADD213PS128, ssa.OpAMD64VFMADD213PS256, ssa.OpAMD64VFMADD213PS512, @@ -1430,12 +1440,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMADDUBSWMasked128Merging, ssa.OpAMD64VPMADDUBSWMasked256Merging, ssa.OpAMD64VPMADDUBSWMasked512Merging, - ssa.OpAMD64VPDPBUSDMasked128, - ssa.OpAMD64VPDPBUSDMasked256, - ssa.OpAMD64VPDPBUSDMasked512, - ssa.OpAMD64VPDPBUSDSMasked128, - ssa.OpAMD64VPDPBUSDSMasked256, - ssa.OpAMD64VPDPBUSDSMasked512, ssa.OpAMD64VGF2P8MULBMasked128Merging, ssa.OpAMD64VGF2P8MULBMasked256Merging, ssa.OpAMD64VGF2P8MULBMasked512Merging, @@ -1559,12 +1563,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORVQMasked128Merging, ssa.OpAMD64VPRORVQMasked256Merging, ssa.OpAMD64VPRORVQMasked512Merging, - ssa.OpAMD64VPACKSSDWMasked128Merging, ssa.OpAMD64VPACKSSDWMasked256Merging, ssa.OpAMD64VPACKSSDWMasked512Merging, - ssa.OpAMD64VPACKUSDWMasked128Merging, + ssa.OpAMD64VPACKSSDWMasked128Merging, ssa.OpAMD64VPACKUSDWMasked256Merging, ssa.OpAMD64VPACKUSDWMasked512Merging, + ssa.OpAMD64VPACKUSDWMasked128Merging, ssa.OpAMD64VSCALEFPSMasked128Merging, ssa.OpAMD64VSCALEFPSMasked256Merging, ssa.OpAMD64VSCALEFPSMasked512Merging, @@ -1955,8 +1959,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPERMI2Q256load, ssa.OpAMD64VPERMI2PD512load, ssa.OpAMD64VPERMI2Q512load, - ssa.OpAMD64VPDPBUSD512load, - ssa.OpAMD64VPDPBUSDS512load, ssa.OpAMD64VFMADD213PS128load, ssa.OpAMD64VFMADD213PS256load, ssa.OpAMD64VFMADD213PS512load, @@ -2004,12 +2006,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPERMI2QMasked256load, ssa.OpAMD64VPERMI2PDMasked512load, ssa.OpAMD64VPERMI2QMasked512load, - ssa.OpAMD64VPDPBUSDMasked128load, - ssa.OpAMD64VPDPBUSDMasked256load, - ssa.OpAMD64VPDPBUSDMasked512load, - ssa.OpAMD64VPDPBUSDSMasked128load, - ssa.OpAMD64VPDPBUSDSMasked256load, - ssa.OpAMD64VPDPBUSDSMasked512load, ssa.OpAMD64VFMADD213PSMasked128load, ssa.OpAMD64VFMADD213PSMasked256load, ssa.OpAMD64VFMADD213PSMasked512load, @@ -2146,12 +2142,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORVQMasked128load, ssa.OpAMD64VPRORVQMasked256load, ssa.OpAMD64VPRORVQMasked512load, - ssa.OpAMD64VPACKSSDWMasked128load, ssa.OpAMD64VPACKSSDWMasked256load, ssa.OpAMD64VPACKSSDWMasked512load, - ssa.OpAMD64VPACKUSDWMasked128load, + ssa.OpAMD64VPACKSSDWMasked128load, ssa.OpAMD64VPACKUSDWMasked256load, ssa.OpAMD64VPACKUSDWMasked512load, + ssa.OpAMD64VPACKUSDWMasked128load, ssa.OpAMD64VSCALEFPSMasked128load, ssa.OpAMD64VSCALEFPSMasked256load, ssa.OpAMD64VSCALEFPSMasked512load, @@ -2638,7 +2634,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSQDMasked128_128Merging, ssa.OpAMD64VPMOVSQDMasked128_256Merging, ssa.OpAMD64VPMOVSQDMasked256Merging, + ssa.OpAMD64VPMOVUSWBMasked128_128Merging, + ssa.OpAMD64VPMOVUSWBMasked128_256Merging, ssa.OpAMD64VPMOVUSWBMasked256Merging, + ssa.OpAMD64VPMOVUSDBMasked128_128Merging, + ssa.OpAMD64VPMOVUSDBMasked128_256Merging, + ssa.OpAMD64VPMOVUSDBMasked128_512Merging, + ssa.OpAMD64VPMOVUSQBMasked128_128Merging, + ssa.OpAMD64VPMOVUSQBMasked128_256Merging, + ssa.OpAMD64VPMOVUSQBMasked128_512Merging, ssa.OpAMD64VPMOVUSDWMasked128_128Merging, ssa.OpAMD64VPMOVUSDWMasked128_256Merging, ssa.OpAMD64VPMOVUSDWMasked256Merging, @@ -3021,18 +3025,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMADDUBSWMasked128, ssa.OpAMD64VPMADDUBSWMasked256, ssa.OpAMD64VPMADDUBSWMasked512, - ssa.OpAMD64VPDPBUSDMasked128, - ssa.OpAMD64VPDPBUSDMasked128load, - ssa.OpAMD64VPDPBUSDMasked256, - ssa.OpAMD64VPDPBUSDMasked256load, - ssa.OpAMD64VPDPBUSDMasked512, - ssa.OpAMD64VPDPBUSDMasked512load, - ssa.OpAMD64VPDPBUSDSMasked128, - ssa.OpAMD64VPDPBUSDSMasked128load, - ssa.OpAMD64VPDPBUSDSMasked256, - ssa.OpAMD64VPDPBUSDSMasked256load, - ssa.OpAMD64VPDPBUSDSMasked512, - ssa.OpAMD64VPDPBUSDSMasked512load, ssa.OpAMD64VEXPANDPSMasked128, ssa.OpAMD64VEXPANDPSMasked256, ssa.OpAMD64VEXPANDPSMasked512, @@ -3415,12 +3407,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSQBMasked128_128, ssa.OpAMD64VPMOVSQBMasked128_256, ssa.OpAMD64VPMOVSQBMasked128_512, - ssa.OpAMD64VPACKSSDWMasked128, - ssa.OpAMD64VPACKSSDWMasked128load, ssa.OpAMD64VPACKSSDWMasked256, ssa.OpAMD64VPACKSSDWMasked256load, ssa.OpAMD64VPACKSSDWMasked512, ssa.OpAMD64VPACKSSDWMasked512load, + ssa.OpAMD64VPACKSSDWMasked128, + ssa.OpAMD64VPACKSSDWMasked128load, ssa.OpAMD64VPMOVSDWMasked128_128, ssa.OpAMD64VPMOVSDWMasked128_256, ssa.OpAMD64VPMOVSDWMasked256, @@ -3430,13 +3422,21 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSQDMasked128_128, ssa.OpAMD64VPMOVSQDMasked128_256, ssa.OpAMD64VPMOVSQDMasked256, + ssa.OpAMD64VPMOVUSWBMasked128_128, + ssa.OpAMD64VPMOVUSWBMasked128_256, ssa.OpAMD64VPMOVUSWBMasked256, - ssa.OpAMD64VPACKUSDWMasked128, - ssa.OpAMD64VPACKUSDWMasked128load, + ssa.OpAMD64VPMOVUSDBMasked128_128, + ssa.OpAMD64VPMOVUSDBMasked128_256, + ssa.OpAMD64VPMOVUSDBMasked128_512, + ssa.OpAMD64VPMOVUSQBMasked128_128, + ssa.OpAMD64VPMOVUSQBMasked128_256, + ssa.OpAMD64VPMOVUSQBMasked128_512, ssa.OpAMD64VPACKUSDWMasked256, ssa.OpAMD64VPACKUSDWMasked256load, ssa.OpAMD64VPACKUSDWMasked512, ssa.OpAMD64VPACKUSDWMasked512load, + ssa.OpAMD64VPACKUSDWMasked128, + ssa.OpAMD64VPACKUSDWMasked128load, ssa.OpAMD64VPMOVUSDWMasked128_128, ssa.OpAMD64VPMOVUSDWMasked128_256, ssa.OpAMD64VPMOVUSDWMasked256, diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 5ddcb84c59..e9a566d759 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -1845,7 +1845,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpAMD64VPMOVVec32x16ToM, ssa.OpAMD64VPMOVVec64x2ToM, ssa.OpAMD64VPMOVVec64x4ToM, - ssa.OpAMD64VPMOVVec64x8ToM: + ssa.OpAMD64VPMOVVec64x8ToM, + ssa.OpAMD64VPMOVMSKB128, + ssa.OpAMD64VPMOVMSKB256, + ssa.OpAMD64VMOVMSKPS128, + ssa.OpAMD64VMOVMSKPS256, + ssa.OpAMD64VMOVMSKPD128, + ssa.OpAMD64VMOVMSKPD256: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = simdReg(v.Args[0]) diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go index 33f9c325c3..4fa9cf07fb 100644 --- a/src/cmd/compile/internal/inline/inl.go +++ b/src/cmd/compile/internal/inline/inl.go @@ -516,6 +516,9 @@ opSwitch: break opSwitch case "panicrangestate": cheap = true + case "deferrangefunc": + v.reason = "defer call in range func" + return true } } } diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index 353d272179..b49e85b53c 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -1679,21 +1679,21 @@ (Cvt8toMask64x8 <t> x) => (VPMOVMToVec64x8 <types.TypeVec512> (KMOVBk <t> x)) // masks to integers -(CvtMask8x16to16 <t> x) => (KMOVWi <t> (VPMOVVec8x16ToM <types.TypeMask> x)) -(CvtMask8x32to32 <t> x) => (KMOVDi <t> (VPMOVVec8x32ToM <types.TypeMask> x)) -(CvtMask8x64to64 <t> x) => (KMOVQi <t> (VPMOVVec8x64ToM <types.TypeMask> x)) +(CvtMask8x16to16 ...) => (VPMOVMSKB128 ...) +(CvtMask8x32to32 ...) => (VPMOVMSKB256 ...) +(CvtMask8x64to64 x) => (KMOVQi (VPMOVVec8x64ToM <types.TypeMask> x)) -(CvtMask16x8to8 <t> x) => (KMOVBi <t> (VPMOVVec16x8ToM <types.TypeMask> x)) -(CvtMask16x16to16 <t> x) => (KMOVWi <t> (VPMOVVec16x16ToM <types.TypeMask> x)) -(CvtMask16x32to32 <t> x) => (KMOVDi <t> (VPMOVVec16x32ToM <types.TypeMask> x)) +(CvtMask16x8to8 x) => (KMOVBi (VPMOVVec16x8ToM <types.TypeMask> x)) +(CvtMask16x16to16 x) => (KMOVWi (VPMOVVec16x16ToM <types.TypeMask> x)) +(CvtMask16x32to32 x) => (KMOVDi (VPMOVVec16x32ToM <types.TypeMask> x)) -(CvtMask32x4to8 <t> x) => (KMOVBi <t> (VPMOVVec32x4ToM <types.TypeMask> x)) -(CvtMask32x8to8 <t> x) => (KMOVBi <t> (VPMOVVec32x8ToM <types.TypeMask> x)) -(CvtMask32x16to16 <t> x) => (KMOVWi <t> (VPMOVVec32x16ToM <types.TypeMask> x)) +(CvtMask32x4to8 ...) => (VMOVMSKPS128 ...) +(CvtMask32x8to8 ...) => (VMOVMSKPS256 ...) +(CvtMask32x16to16 x) => (KMOVWi (VPMOVVec32x16ToM <types.TypeMask> x)) -(CvtMask64x2to8 <t> x) => (KMOVBi <t> (VPMOVVec64x2ToM <types.TypeMask> x)) -(CvtMask64x4to8 <t> x) => (KMOVBi <t> (VPMOVVec64x4ToM <types.TypeMask> x)) -(CvtMask64x8to8 <t> x) => (KMOVBi <t> (VPMOVVec64x8ToM <types.TypeMask> x)) +(CvtMask64x2to8 ...) => (VMOVMSKPD128 ...) +(CvtMask64x4to8 ...) => (VMOVMSKPD256 ...) +(CvtMask64x8to8 x) => (KMOVBi (VPMOVVec64x8ToM <types.TypeMask> x)) // optimizations (MOVBstore [off] {sym} ptr (KMOVBi mask) mem) => (KMOVBstore [off] {sym} ptr mask mem) @@ -1730,6 +1730,13 @@ // Misc (IsZeroVec x) => (SETEQ (VPTEST x x)) +(IsNaNFloat32x4 x) => (VCMPPS128 [3] x x) +(IsNaNFloat32x8 x) => (VCMPPS256 [3] x x) +(IsNaNFloat32x16 x) => (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) +(IsNaNFloat64x2 x) => (VCMPPD128 [3] x x) +(IsNaNFloat64x4 x) => (VCMPPD256 [3] x x) +(IsNaNFloat64x8 x) => (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) + // SIMD vector K-masked loads and stores (LoadMasked64 <t> ptr mask mem) && t.Size() == 64 => (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem) @@ -1818,10 +1825,10 @@ (EQ (VPTEST x:(VPANDN(128|256) j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order (EQ (VPTEST x:(VPANDN(D|Q)512 j k) y) yes no) && x == y && x.Uses == 2 => (ULT (VPTEST k j) yes no) // AndNot has swapped its operand order -// DotProductQuadruple optimizations -(VPADDD128 (VPDPBUSD128 (Zero128 <t>) x y) z) => (VPDPBUSD128 <t> z x y) -(VPADDD256 (VPDPBUSD256 (Zero256 <t>) x y) z) => (VPDPBUSD256 <t> z x y) -(VPADDD512 (VPDPBUSD512 (Zero512 <t>) x y) z) => (VPDPBUSD512 <t> z x y) -(VPADDD128 (VPDPBUSDS128 (Zero128 <t>) x y) z) => (VPDPBUSDS128 <t> z x y) -(VPADDD256 (VPDPBUSDS256 (Zero256 <t>) x y) z) => (VPDPBUSDS256 <t> z x y) -(VPADDD512 (VPDPBUSDS512 (Zero512 <t>) x y) z) => (VPDPBUSDS512 <t> z x y)
\ No newline at end of file +// optimize x.IsNaN().Or(y.IsNaN()) +(VPOR128 (VCMPP(S|D)128 [3] x x) (VCMPP(S|D)128 [3] y y)) => (VCMPP(S|D)128 [3] x y) +(VPOR256 (VCMPP(S|D)256 [3] x x) (VCMPP(S|D)256 [3] y y)) => (VCMPP(S|D)256 [3] x y) +(VPORD512 (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) (VPMOVMToVec32x16 (VCMPPS512 [3] y y))) => + (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) +(VPORD512 (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) (VPMOVMToVec64x8 (VCMPPD512 [3] y y))) => + (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go index 2fb4fdfc96..b13eb5aa21 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go @@ -1368,6 +1368,7 @@ func init() { {name: "VPMASK64load512", argLength: 3, reg: vloadk, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0+auxint+aux, arg1=k mask, arg2 = mem {name: "VPMASK64store512", argLength: 4, reg: vstorek, asm: "VMOVDQU64", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"}, // store, *(arg0+auxint+aux) = arg2, arg1=k mask, arg3 = mem + // AVX512 moves between int-vector and mask registers {name: "VPMOVMToVec8x16", argLength: 1, reg: kv, asm: "VPMOVM2B"}, {name: "VPMOVMToVec8x32", argLength: 1, reg: kv, asm: "VPMOVM2B"}, {name: "VPMOVMToVec8x64", argLength: 1, reg: kw, asm: "VPMOVM2B"}, @@ -1400,6 +1401,14 @@ func init() { {name: "VPMOVVec64x4ToM", argLength: 1, reg: vk, asm: "VPMOVQ2M"}, {name: "VPMOVVec64x8ToM", argLength: 1, reg: wk, asm: "VPMOVQ2M"}, + // AVX1/2 moves from int-vector to bitmask (extracting sign bits) + {name: "VPMOVMSKB128", argLength: 1, reg: vgp, asm: "VPMOVMSKB"}, + {name: "VPMOVMSKB256", argLength: 1, reg: vgp, asm: "VPMOVMSKB"}, + {name: "VMOVMSKPS128", argLength: 1, reg: vgp, asm: "VMOVMSKPS"}, + {name: "VMOVMSKPS256", argLength: 1, reg: vgp, asm: "VMOVMSKPS"}, + {name: "VMOVMSKPD128", argLength: 1, reg: vgp, asm: "VMOVMSKPD"}, + {name: "VMOVMSKPD256", argLength: 1, reg: vgp, asm: "VMOVMSKPD"}, + // X15 is the zero register up to 128-bit. For larger values, we zero it on the fly. {name: "Zero128", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true}, {name: "Zero256", argLength: 0, reg: v01, asm: "VPXOR"}, diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go index 8637133e5f..85bde1aab2 100644 --- a/src/cmd/compile/internal/ssa/_gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go @@ -715,6 +715,14 @@ var genericOps = []opData{ // Returns true if arg0 is all zero. {name: "IsZeroVec", argLength: 1}, + + // Returns a mask indicating whether arg0's elements are NaN. + {name: "IsNaNFloat32x4", argLength: 1}, + {name: "IsNaNFloat32x8", argLength: 1}, + {name: "IsNaNFloat32x16", argLength: 1}, + {name: "IsNaNFloat64x2", argLength: 1}, + {name: "IsNaNFloat64x4", argLength: 1}, + {name: "IsNaNFloat64x8", argLength: 1}, } // kind controls successors implicit exit diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 649940497c..5c83f39a1f 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1,4 +1,4 @@ -// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. (AESDecryptLastRoundUint8x16 ...) => (VAESDECLAST128 ...) (AESDecryptLastRoundUint8x32 ...) => (VAESDECLAST256 ...) @@ -57,19 +57,19 @@ (AddUint64x4 ...) => (VPADDQ256 ...) (AddUint64x8 ...) => (VPADDQ512 ...) (AddPairsFloat32x4 ...) => (VHADDPS128 ...) -(AddPairsFloat32x8 ...) => (VHADDPS256 ...) (AddPairsFloat64x2 ...) => (VHADDPD128 ...) -(AddPairsFloat64x4 ...) => (VHADDPD256 ...) (AddPairsInt16x8 ...) => (VPHADDW128 ...) -(AddPairsInt16x16 ...) => (VPHADDW256 ...) (AddPairsInt32x4 ...) => (VPHADDD128 ...) -(AddPairsInt32x8 ...) => (VPHADDD256 ...) (AddPairsUint16x8 ...) => (VPHADDW128 ...) -(AddPairsUint16x16 ...) => (VPHADDW256 ...) (AddPairsUint32x4 ...) => (VPHADDD128 ...) -(AddPairsUint32x8 ...) => (VPHADDD256 ...) +(AddPairsGroupedFloat32x8 ...) => (VHADDPS256 ...) +(AddPairsGroupedFloat64x4 ...) => (VHADDPD256 ...) +(AddPairsGroupedInt16x16 ...) => (VPHADDW256 ...) +(AddPairsGroupedInt32x8 ...) => (VPHADDD256 ...) +(AddPairsGroupedUint16x16 ...) => (VPHADDW256 ...) +(AddPairsGroupedUint32x8 ...) => (VPHADDD256 ...) (AddPairsSaturatedInt16x8 ...) => (VPHADDSW128 ...) -(AddPairsSaturatedInt16x16 ...) => (VPHADDSW256 ...) +(AddPairsSaturatedGroupedInt16x16 ...) => (VPHADDSW256 ...) (AddSaturatedInt8x16 ...) => (VPADDSB128 ...) (AddSaturatedInt8x32 ...) => (VPADDSB256 ...) (AddSaturatedInt8x64 ...) => (VPADDSB512 ...) @@ -316,12 +316,6 @@ (DotProductPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...) (DotProductPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...) (DotProductPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...) -(DotProductQuadrupleInt32x4 ...) => (VPDPBUSD128 ...) -(DotProductQuadrupleInt32x8 ...) => (VPDPBUSD256 ...) -(DotProductQuadrupleInt32x16 ...) => (VPDPBUSD512 ...) -(DotProductQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...) -(DotProductQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...) -(DotProductQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...) (EqualFloat32x4 x y) => (VCMPPS128 [0] x y) (EqualFloat32x8 x y) => (VCMPPS256 [0] x y) (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) @@ -382,26 +376,26 @@ (ExpandUint64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask)) (ExpandUint64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask)) (ExpandUint64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask)) -(ExtendLo2ToInt64x2Int8x16 ...) => (VPMOVSXBQ128 ...) -(ExtendLo2ToInt64x2Int16x8 ...) => (VPMOVSXWQ128 ...) -(ExtendLo2ToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...) -(ExtendLo2ToUint64x2Uint8x16 ...) => (VPMOVZXBQ128 ...) -(ExtendLo2ToUint64x2Uint16x8 ...) => (VPMOVZXWQ128 ...) -(ExtendLo2ToUint64x2Uint32x4 ...) => (VPMOVZXDQ128 ...) -(ExtendLo4ToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...) -(ExtendLo4ToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...) -(ExtendLo4ToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...) -(ExtendLo4ToInt64x4Int16x8 ...) => (VPMOVSXWQ256 ...) -(ExtendLo4ToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...) -(ExtendLo4ToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...) -(ExtendLo4ToUint64x4Uint8x16 ...) => (VPMOVZXBQ256 ...) -(ExtendLo4ToUint64x4Uint16x8 ...) => (VPMOVZXWQ256 ...) -(ExtendLo8ToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...) -(ExtendLo8ToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...) -(ExtendLo8ToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...) -(ExtendLo8ToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...) -(ExtendLo8ToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...) -(ExtendLo8ToUint64x8Uint8x16 ...) => (VPMOVZXBQ512 ...) +(ExtendLo2ToInt64Int8x16 ...) => (VPMOVSXBQ128 ...) +(ExtendLo2ToInt64Int16x8 ...) => (VPMOVSXWQ128 ...) +(ExtendLo2ToInt64Int32x4 ...) => (VPMOVSXDQ128 ...) +(ExtendLo2ToUint64Uint8x16 ...) => (VPMOVZXBQ128 ...) +(ExtendLo2ToUint64Uint16x8 ...) => (VPMOVZXWQ128 ...) +(ExtendLo2ToUint64Uint32x4 ...) => (VPMOVZXDQ128 ...) +(ExtendLo4ToInt32Int8x16 ...) => (VPMOVSXBD128 ...) +(ExtendLo4ToInt32Int16x8 ...) => (VPMOVSXWD128 ...) +(ExtendLo4ToInt64Int8x16 ...) => (VPMOVSXBQ256 ...) +(ExtendLo4ToInt64Int16x8 ...) => (VPMOVSXWQ256 ...) +(ExtendLo4ToUint32Uint8x16 ...) => (VPMOVZXBD128 ...) +(ExtendLo4ToUint32Uint16x8 ...) => (VPMOVZXWD128 ...) +(ExtendLo4ToUint64Uint8x16 ...) => (VPMOVZXBQ256 ...) +(ExtendLo4ToUint64Uint16x8 ...) => (VPMOVZXWQ256 ...) +(ExtendLo8ToInt16Int8x16 ...) => (VPMOVSXBW128 ...) +(ExtendLo8ToInt32Int8x16 ...) => (VPMOVSXBD256 ...) +(ExtendLo8ToInt64Int8x16 ...) => (VPMOVSXBQ512 ...) +(ExtendLo8ToUint16Uint8x16 ...) => (VPMOVZXBW128 ...) +(ExtendLo8ToUint32Uint8x16 ...) => (VPMOVZXBD256 ...) +(ExtendLo8ToUint64Uint8x16 ...) => (VPMOVZXBQ512 ...) (ExtendToInt16Int8x16 ...) => (VPMOVSXBW256 ...) (ExtendToInt16Int8x32 ...) => (VPMOVSXBW512 ...) (ExtendToInt32Int8x16 ...) => (VPMOVSXBD512 ...) @@ -565,12 +559,6 @@ (InterleaveLoGroupedUint32x16 ...) => (VPUNPCKLDQ512 ...) (InterleaveLoGroupedUint64x4 ...) => (VPUNPCKLQDQ256 ...) (InterleaveLoGroupedUint64x8 ...) => (VPUNPCKLQDQ512 ...) -(IsNanFloat32x4 x y) => (VCMPPS128 [3] x y) -(IsNanFloat32x8 x y) => (VCMPPS256 [3] x y) -(IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) -(IsNanFloat64x2 x y) => (VCMPPD128 [3] x y) -(IsNanFloat64x4 x y) => (VCMPPD256 [3] x y) -(IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) (LeadingZerosInt32x4 ...) => (VPLZCNTD128 ...) (LeadingZerosInt32x8 ...) => (VPLZCNTD256 ...) (LeadingZerosInt32x16 ...) => (VPLZCNTD512 ...) @@ -914,29 +902,29 @@ (SaturateToInt16Int64x4 ...) => (VPMOVSQW128_256 ...) (SaturateToInt16Int64x8 ...) => (VPMOVSQW128_512 ...) (SaturateToInt16ConcatInt32x4 ...) => (VPACKSSDW128 ...) -(SaturateToInt16ConcatInt32x8 ...) => (VPACKSSDW256 ...) -(SaturateToInt16ConcatInt32x16 ...) => (VPACKSSDW512 ...) +(SaturateToInt16ConcatGroupedInt32x8 ...) => (VPACKSSDW256 ...) +(SaturateToInt16ConcatGroupedInt32x16 ...) => (VPACKSSDW512 ...) (SaturateToInt32Int64x2 ...) => (VPMOVSQD128_128 ...) (SaturateToInt32Int64x4 ...) => (VPMOVSQD128_256 ...) (SaturateToInt32Int64x8 ...) => (VPMOVSQD256 ...) -(SaturateToUint8Int16x8 ...) => (VPMOVSWB128_128 ...) -(SaturateToUint8Int16x16 ...) => (VPMOVSWB128_256 ...) -(SaturateToUint8Int32x4 ...) => (VPMOVSDB128_128 ...) -(SaturateToUint8Int32x8 ...) => (VPMOVSDB128_256 ...) -(SaturateToUint8Int32x16 ...) => (VPMOVSDB128_512 ...) -(SaturateToUint8Int64x2 ...) => (VPMOVSQB128_128 ...) -(SaturateToUint8Int64x4 ...) => (VPMOVSQB128_256 ...) -(SaturateToUint8Int64x8 ...) => (VPMOVSQB128_512 ...) +(SaturateToUint8Uint16x8 ...) => (VPMOVUSWB128_128 ...) +(SaturateToUint8Uint16x16 ...) => (VPMOVUSWB128_256 ...) (SaturateToUint8Uint16x32 ...) => (VPMOVUSWB256 ...) +(SaturateToUint8Uint32x4 ...) => (VPMOVUSDB128_128 ...) +(SaturateToUint8Uint32x8 ...) => (VPMOVUSDB128_256 ...) +(SaturateToUint8Uint32x16 ...) => (VPMOVUSDB128_512 ...) +(SaturateToUint8Uint64x2 ...) => (VPMOVUSQB128_128 ...) +(SaturateToUint8Uint64x4 ...) => (VPMOVUSQB128_256 ...) +(SaturateToUint8Uint64x8 ...) => (VPMOVUSQB128_512 ...) (SaturateToUint16Uint32x4 ...) => (VPMOVUSDW128_128 ...) (SaturateToUint16Uint32x8 ...) => (VPMOVUSDW128_256 ...) (SaturateToUint16Uint32x16 ...) => (VPMOVUSDW256 ...) (SaturateToUint16Uint64x2 ...) => (VPMOVUSQW128_128 ...) (SaturateToUint16Uint64x4 ...) => (VPMOVUSQW128_256 ...) (SaturateToUint16Uint64x8 ...) => (VPMOVUSQW128_512 ...) -(SaturateToUint16ConcatUint32x4 ...) => (VPACKUSDW128 ...) -(SaturateToUint16ConcatUint32x8 ...) => (VPACKUSDW256 ...) -(SaturateToUint16ConcatUint32x16 ...) => (VPACKUSDW512 ...) +(SaturateToUint16ConcatInt32x4 ...) => (VPACKUSDW128 ...) +(SaturateToUint16ConcatGroupedInt32x8 ...) => (VPACKUSDW256 ...) +(SaturateToUint16ConcatGroupedInt32x16 ...) => (VPACKUSDW512 ...) (SaturateToUint32Uint64x2 ...) => (VPMOVUSQD128_128 ...) (SaturateToUint32Uint64x4 ...) => (VPMOVUSQD128_256 ...) (SaturateToUint32Uint64x8 ...) => (VPMOVUSQD256 ...) @@ -1223,19 +1211,19 @@ (SubUint64x4 ...) => (VPSUBQ256 ...) (SubUint64x8 ...) => (VPSUBQ512 ...) (SubPairsFloat32x4 ...) => (VHSUBPS128 ...) -(SubPairsFloat32x8 ...) => (VHSUBPS256 ...) (SubPairsFloat64x2 ...) => (VHSUBPD128 ...) -(SubPairsFloat64x4 ...) => (VHSUBPD256 ...) (SubPairsInt16x8 ...) => (VPHSUBW128 ...) -(SubPairsInt16x16 ...) => (VPHSUBW256 ...) (SubPairsInt32x4 ...) => (VPHSUBD128 ...) -(SubPairsInt32x8 ...) => (VPHSUBD256 ...) (SubPairsUint16x8 ...) => (VPHSUBW128 ...) -(SubPairsUint16x16 ...) => (VPHSUBW256 ...) (SubPairsUint32x4 ...) => (VPHSUBD128 ...) -(SubPairsUint32x8 ...) => (VPHSUBD256 ...) +(SubPairsGroupedFloat32x8 ...) => (VHSUBPS256 ...) +(SubPairsGroupedFloat64x4 ...) => (VHSUBPD256 ...) +(SubPairsGroupedInt16x16 ...) => (VPHSUBW256 ...) +(SubPairsGroupedInt32x8 ...) => (VPHSUBD256 ...) +(SubPairsGroupedUint16x16 ...) => (VPHSUBW256 ...) +(SubPairsGroupedUint32x8 ...) => (VPHSUBD256 ...) (SubPairsSaturatedInt16x8 ...) => (VPHSUBSW128 ...) -(SubPairsSaturatedInt16x16 ...) => (VPHSUBSW256 ...) +(SubPairsSaturatedGroupedInt16x16 ...) => (VPHSUBSW256 ...) (SubSaturatedInt8x16 ...) => (VPSUBSB128 ...) (SubSaturatedInt8x32 ...) => (VPSUBSB256 ...) (SubSaturatedInt8x64 ...) => (VPSUBSB512 ...) @@ -1547,12 +1535,6 @@ (VMOVDQU16Masked128 (VPMADDUBSW128 x y) mask) => (VPMADDUBSWMasked128 x y mask) (VMOVDQU16Masked256 (VPMADDUBSW256 x y) mask) => (VPMADDUBSWMasked256 x y mask) (VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512 x y mask) -(VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask) => (VPDPBUSDMasked128 x y z mask) -(VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask) => (VPDPBUSDMasked256 x y z mask) -(VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) => (VPDPBUSDMasked512 x y z mask) -(VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask) => (VPDPBUSDSMasked128 x y z mask) -(VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask) => (VPDPBUSDSMasked256 x y z mask) -(VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) => (VPDPBUSDSMasked512 x y z mask) (VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) => (VPMOVSXBQMasked128 x mask) (VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) => (VPMOVSXWQMasked128 x mask) (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask) @@ -1775,9 +1757,9 @@ (VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) => (VPMOVSQBMasked128_128 x mask) (VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) => (VPMOVSQBMasked128_256 x mask) (VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512 x mask) -(VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask) (VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) => (VPACKSSDWMasked256 x y mask) (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512 x y mask) +(VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask) (VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) => (VPMOVSDWMasked128_128 x mask) (VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) => (VPMOVSDWMasked128_256 x mask) (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256 x mask) @@ -1787,10 +1769,18 @@ (VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) => (VPMOVSQDMasked128_128 x mask) (VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) => (VPMOVSQDMasked128_256 x mask) (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256 x mask) +(VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) => (VPMOVUSWBMasked128_128 x mask) +(VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) => (VPMOVUSWBMasked128_256 x mask) (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256 x mask) -(VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask) +(VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) => (VPMOVUSDBMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) => (VPMOVUSDBMasked128_256 x mask) +(VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) => (VPMOVUSQBMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) => (VPMOVUSQBMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512 x mask) (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask) (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask) +(VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask) (VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) => (VPMOVUSDWMasked128_128 x mask) (VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) => (VPMOVUSDWMasked128_256 x mask) (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256 x mask) @@ -2018,6 +2008,7 @@ (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask) (VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512Merging dst x mask) (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512Merging dst x mask) (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask) (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask) @@ -2071,6 +2062,7 @@ (VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512Merging dst x mask) (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask) (VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512Merging dst x mask) (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask) (VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512Merging dst x mask) (VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask) @@ -2235,9 +2227,12 @@ (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) @@ -2396,9 +2391,12 @@ (VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPMOVWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) @@ -2511,30 +2509,30 @@ (VPANDNQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked128load {sym} [off] x ptr mask mem) (VPANDNQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked256load {sym} [off] x ptr mask mem) (VPANDNQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked512load {sym} [off] x ptr mask mem) -(VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) (VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS128load {sym} [off] x y ptr mem) (VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D128load {sym} [off] x y ptr mem) (VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS256load {sym} [off] x y ptr mem) @@ -2655,54 +2653,46 @@ (VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked128load {sym} [off] x ptr mask mem) (VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked256load {sym} [off] x ptr mask mem) (VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked512load {sym} [off] x ptr mask mem) -(VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD512load {sym} [off] x y ptr mem) -(VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem) -(VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem) -(VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem) -(VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS512load {sym} [off] x y ptr mem) -(VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem) -(VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem) -(VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem) (VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD512load {sym} [off] x ptr mem) (VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ512load {sym} [off] x ptr mem) -(VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) (VPCMPGTD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD512load {sym} [off] x ptr mem) (VPCMPGTQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ512load {sym} [off] x ptr mem) -(VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) (VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ512load {sym} [off] x ptr mem) (VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ512load {sym} [off] x ptr mem) (VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem) @@ -2883,30 +2873,30 @@ (VRSQRT14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked128load {sym} [off] ptr mask mem) (VRSQRT14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked256load {sym} [off] ptr mask mem) (VRSQRT14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked512load {sym} [off] ptr mask mem) -(VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) (VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD128load {sym} [off] x ptr mem) (VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD256load {sym} [off] x ptr mem) (VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD512load {sym} [off] x ptr mem) @@ -2932,13 +2922,13 @@ (VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked256load {sym} [off] x ptr mask mem) (VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked512load {sym} [off] x ptr mask mem) (VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem) -(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem) (VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem) (VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked512load {sym} [off] x ptr mask mem) +(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem) (VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW512load {sym} [off] x ptr mem) -(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem) (VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked256load {sym} [off] x ptr mask mem) (VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked512load {sym} [off] x ptr mask mem) +(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem) (VSCALEFPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS128load {sym} [off] x ptr mem) (VSCALEFPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS256load {sym} [off] x ptr mem) (VSCALEFPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS512load {sym} [off] x ptr mem) @@ -2951,30 +2941,30 @@ (VSCALEFPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked128load {sym} [off] x ptr mask mem) (VSCALEFPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked256load {sym} [off] x ptr mask mem) (VSCALEFPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked512load {sym} [off] x ptr mask mem) -(VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) -(VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) +(VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) (VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD512load {sym} [off] x ptr mem) (VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ512load {sym} [off] x ptr mem) (VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD128load {sym} [off] x y ptr mem) @@ -3059,41 +3049,41 @@ (VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked512load {sym} [off] x ptr mask mem) (VPBLENDMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMDMasked512load {sym} [off] x ptr mask mem) (VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMQMasked512load {sym} [off] x ptr mask mem) -(VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) -(VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) -(VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) -(VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) -(VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) -(VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) -(VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) -(VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) -(VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) +(VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSHUFPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) +(VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) +(VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) +(VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) +(VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) +(VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) +(VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) +(VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) +(VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index f38d24fde7..648e372fb4 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -1,4 +1,4 @@ -// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main @@ -452,18 +452,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPCOMPRESSWMasked128", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCOMPRESSWMasked256", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPDPBUSD128", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPDPBUSD256", argLength: 3, reg: v31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPDPBUSD512", argLength: 3, reg: w31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPDPBUSDMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPDPBUSDMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPDPBUSDMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPDPBUSDS128", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPDPBUSDS256", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPDPBUSDS512", argLength: 3, reg: w31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPDPBUSDSMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPDPBUSDSMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPDPBUSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPDPWSSD128", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPDPWSSD256", argLength: 3, reg: v31, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPDPWSSD512", argLength: 3, reg: w31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true}, @@ -780,12 +768,24 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMOVUSDB128_128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDB128_256", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDB128_512", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDW128_128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDW128_256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDW256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVUSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVUSQB128_128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQB128_256", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQB128_512", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQD128_128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQD128_256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQD256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -798,7 +798,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVUSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWB128_128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWB128_256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSWB256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMOVUSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVWB128_128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVWB128_256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -1698,14 +1702,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPCMPEQQ512load", argLength: 3, reg: w2kload, asm: "VPCMPEQQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPCMPGTD512load", argLength: 3, reg: w2kload, asm: "VPCMPGTD", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false}, {name: "VPCMPGTQ512load", argLength: 3, reg: w2kload, asm: "VPCMPGTQ", commutative: false, typ: "Mask", aux: "SymOff", symEffect: "Read", resultInArg0: false}, - {name: "VPDPBUSD512load", argLength: 4, reg: w31load, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDS512load", argLength: 4, reg: w31load, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDSMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDSMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true}, - {name: "VPDPBUSDSMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, {name: "VPDPWSSD512load", argLength: 4, reg: w31load, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true}, {name: "VPDPWSSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true}, {name: "VPDPWSSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true}, @@ -2382,15 +2378,23 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVSXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVSXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPMOVUSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPMOVUSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVUSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index a68d8c4122..889ab0d84f 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -1,4 +1,4 @@ -// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. package main @@ -48,19 +48,19 @@ func simdGenericOps() []opData { {name: "AddInt64x4", argLength: 2, commutative: true}, {name: "AddInt64x8", argLength: 2, commutative: true}, {name: "AddPairsFloat32x4", argLength: 2, commutative: false}, - {name: "AddPairsFloat32x8", argLength: 2, commutative: false}, {name: "AddPairsFloat64x2", argLength: 2, commutative: false}, - {name: "AddPairsFloat64x4", argLength: 2, commutative: false}, + {name: "AddPairsGroupedFloat32x8", argLength: 2, commutative: false}, + {name: "AddPairsGroupedFloat64x4", argLength: 2, commutative: false}, + {name: "AddPairsGroupedInt16x16", argLength: 2, commutative: false}, + {name: "AddPairsGroupedInt32x8", argLength: 2, commutative: false}, + {name: "AddPairsGroupedUint16x16", argLength: 2, commutative: false}, + {name: "AddPairsGroupedUint32x8", argLength: 2, commutative: false}, {name: "AddPairsInt16x8", argLength: 2, commutative: false}, - {name: "AddPairsInt16x16", argLength: 2, commutative: false}, {name: "AddPairsInt32x4", argLength: 2, commutative: false}, - {name: "AddPairsInt32x8", argLength: 2, commutative: false}, + {name: "AddPairsSaturatedGroupedInt16x16", argLength: 2, commutative: false}, {name: "AddPairsSaturatedInt16x8", argLength: 2, commutative: false}, - {name: "AddPairsSaturatedInt16x16", argLength: 2, commutative: false}, {name: "AddPairsUint16x8", argLength: 2, commutative: false}, - {name: "AddPairsUint16x16", argLength: 2, commutative: false}, {name: "AddPairsUint32x4", argLength: 2, commutative: false}, - {name: "AddPairsUint32x8", argLength: 2, commutative: false}, {name: "AddSaturatedInt8x16", argLength: 2, commutative: true}, {name: "AddSaturatedInt8x32", argLength: 2, commutative: true}, {name: "AddSaturatedInt8x64", argLength: 2, commutative: true}, @@ -304,12 +304,6 @@ func simdGenericOps() []opData { {name: "DotProductPairsSaturatedUint8x16", argLength: 2, commutative: false}, {name: "DotProductPairsSaturatedUint8x32", argLength: 2, commutative: false}, {name: "DotProductPairsSaturatedUint8x64", argLength: 2, commutative: false}, - {name: "DotProductQuadrupleInt32x4", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleInt32x8", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleInt32x16", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleSaturatedInt32x4", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleSaturatedInt32x8", argLength: 3, commutative: false}, - {name: "DotProductQuadrupleSaturatedInt32x16", argLength: 3, commutative: false}, {name: "EqualFloat32x4", argLength: 2, commutative: true}, {name: "EqualFloat32x8", argLength: 2, commutative: true}, {name: "EqualFloat32x16", argLength: 2, commutative: true}, @@ -370,26 +364,26 @@ func simdGenericOps() []opData { {name: "ExpandUint64x2", argLength: 2, commutative: false}, {name: "ExpandUint64x4", argLength: 2, commutative: false}, {name: "ExpandUint64x8", argLength: 2, commutative: false}, - {name: "ExtendLo2ToInt64x2Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo2ToInt64x2Int16x8", argLength: 1, commutative: false}, - {name: "ExtendLo2ToInt64x2Int32x4", argLength: 1, commutative: false}, - {name: "ExtendLo2ToUint64x2Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo2ToUint64x2Uint16x8", argLength: 1, commutative: false}, - {name: "ExtendLo2ToUint64x2Uint32x4", argLength: 1, commutative: false}, - {name: "ExtendLo4ToInt32x4Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo4ToInt32x4Int16x8", argLength: 1, commutative: false}, - {name: "ExtendLo4ToInt64x4Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo4ToInt64x4Int16x8", argLength: 1, commutative: false}, - {name: "ExtendLo4ToUint32x4Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo4ToUint32x4Uint16x8", argLength: 1, commutative: false}, - {name: "ExtendLo4ToUint64x4Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo4ToUint64x4Uint16x8", argLength: 1, commutative: false}, - {name: "ExtendLo8ToInt16x8Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToInt32x8Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToInt64x8Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToUint16x8Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToUint32x8Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToUint64x8Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo2ToInt64Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo2ToInt64Int16x8", argLength: 1, commutative: false}, + {name: "ExtendLo2ToInt64Int32x4", argLength: 1, commutative: false}, + {name: "ExtendLo2ToUint64Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo2ToUint64Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendLo2ToUint64Uint32x4", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt32Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt32Int16x8", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt64Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt64Int16x8", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint32Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint32Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint64Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint64Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendLo8ToInt16Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToInt32Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToInt64Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToUint16Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToUint32Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToUint64Uint8x16", argLength: 1, commutative: false}, {name: "ExtendToInt16Int8x16", argLength: 1, commutative: false}, {name: "ExtendToInt16Int8x32", argLength: 1, commutative: false}, {name: "ExtendToInt32Int8x16", argLength: 1, commutative: false}, @@ -525,12 +519,6 @@ func simdGenericOps() []opData { {name: "InterleaveLoUint16x8", argLength: 2, commutative: false}, {name: "InterleaveLoUint32x4", argLength: 2, commutative: false}, {name: "InterleaveLoUint64x2", argLength: 2, commutative: false}, - {name: "IsNanFloat32x4", argLength: 2, commutative: true}, - {name: "IsNanFloat32x8", argLength: 2, commutative: true}, - {name: "IsNanFloat32x16", argLength: 2, commutative: true}, - {name: "IsNanFloat64x2", argLength: 2, commutative: true}, - {name: "IsNanFloat64x4", argLength: 2, commutative: true}, - {name: "IsNanFloat64x8", argLength: 2, commutative: true}, {name: "LeadingZerosInt32x4", argLength: 1, commutative: false}, {name: "LeadingZerosInt32x8", argLength: 1, commutative: false}, {name: "LeadingZerosInt32x16", argLength: 1, commutative: false}, @@ -830,9 +818,9 @@ func simdGenericOps() []opData { {name: "SaturateToInt8Int64x2", argLength: 1, commutative: false}, {name: "SaturateToInt8Int64x4", argLength: 1, commutative: false}, {name: "SaturateToInt8Int64x8", argLength: 1, commutative: false}, + {name: "SaturateToInt16ConcatGroupedInt32x8", argLength: 2, commutative: false}, + {name: "SaturateToInt16ConcatGroupedInt32x16", argLength: 2, commutative: false}, {name: "SaturateToInt16ConcatInt32x4", argLength: 2, commutative: false}, - {name: "SaturateToInt16ConcatInt32x8", argLength: 2, commutative: false}, - {name: "SaturateToInt16ConcatInt32x16", argLength: 2, commutative: false}, {name: "SaturateToInt16Int32x4", argLength: 1, commutative: false}, {name: "SaturateToInt16Int32x8", argLength: 1, commutative: false}, {name: "SaturateToInt16Int32x16", argLength: 1, commutative: false}, @@ -842,18 +830,18 @@ func simdGenericOps() []opData { {name: "SaturateToInt32Int64x2", argLength: 1, commutative: false}, {name: "SaturateToInt32Int64x4", argLength: 1, commutative: false}, {name: "SaturateToInt32Int64x8", argLength: 1, commutative: false}, - {name: "SaturateToUint8Int16x8", argLength: 1, commutative: false}, - {name: "SaturateToUint8Int16x16", argLength: 1, commutative: false}, - {name: "SaturateToUint8Int32x4", argLength: 1, commutative: false}, - {name: "SaturateToUint8Int32x8", argLength: 1, commutative: false}, - {name: "SaturateToUint8Int32x16", argLength: 1, commutative: false}, - {name: "SaturateToUint8Int64x2", argLength: 1, commutative: false}, - {name: "SaturateToUint8Int64x4", argLength: 1, commutative: false}, - {name: "SaturateToUint8Int64x8", argLength: 1, commutative: false}, + {name: "SaturateToUint8Uint16x8", argLength: 1, commutative: false}, + {name: "SaturateToUint8Uint16x16", argLength: 1, commutative: false}, {name: "SaturateToUint8Uint16x32", argLength: 1, commutative: false}, - {name: "SaturateToUint16ConcatUint32x4", argLength: 2, commutative: false}, - {name: "SaturateToUint16ConcatUint32x8", argLength: 2, commutative: false}, - {name: "SaturateToUint16ConcatUint32x16", argLength: 2, commutative: false}, + {name: "SaturateToUint8Uint32x4", argLength: 1, commutative: false}, + {name: "SaturateToUint8Uint32x8", argLength: 1, commutative: false}, + {name: "SaturateToUint8Uint32x16", argLength: 1, commutative: false}, + {name: "SaturateToUint8Uint64x2", argLength: 1, commutative: false}, + {name: "SaturateToUint8Uint64x4", argLength: 1, commutative: false}, + {name: "SaturateToUint8Uint64x8", argLength: 1, commutative: false}, + {name: "SaturateToUint16ConcatGroupedInt32x8", argLength: 2, commutative: false}, + {name: "SaturateToUint16ConcatGroupedInt32x16", argLength: 2, commutative: false}, + {name: "SaturateToUint16ConcatInt32x4", argLength: 2, commutative: false}, {name: "SaturateToUint16Uint32x4", argLength: 1, commutative: false}, {name: "SaturateToUint16Uint32x8", argLength: 1, commutative: false}, {name: "SaturateToUint16Uint32x16", argLength: 1, commutative: false}, @@ -1042,19 +1030,19 @@ func simdGenericOps() []opData { {name: "SubInt64x4", argLength: 2, commutative: false}, {name: "SubInt64x8", argLength: 2, commutative: false}, {name: "SubPairsFloat32x4", argLength: 2, commutative: false}, - {name: "SubPairsFloat32x8", argLength: 2, commutative: false}, {name: "SubPairsFloat64x2", argLength: 2, commutative: false}, - {name: "SubPairsFloat64x4", argLength: 2, commutative: false}, + {name: "SubPairsGroupedFloat32x8", argLength: 2, commutative: false}, + {name: "SubPairsGroupedFloat64x4", argLength: 2, commutative: false}, + {name: "SubPairsGroupedInt16x16", argLength: 2, commutative: false}, + {name: "SubPairsGroupedInt32x8", argLength: 2, commutative: false}, + {name: "SubPairsGroupedUint16x16", argLength: 2, commutative: false}, + {name: "SubPairsGroupedUint32x8", argLength: 2, commutative: false}, {name: "SubPairsInt16x8", argLength: 2, commutative: false}, - {name: "SubPairsInt16x16", argLength: 2, commutative: false}, {name: "SubPairsInt32x4", argLength: 2, commutative: false}, - {name: "SubPairsInt32x8", argLength: 2, commutative: false}, + {name: "SubPairsSaturatedGroupedInt16x16", argLength: 2, commutative: false}, {name: "SubPairsSaturatedInt16x8", argLength: 2, commutative: false}, - {name: "SubPairsSaturatedInt16x16", argLength: 2, commutative: false}, {name: "SubPairsUint16x8", argLength: 2, commutative: false}, - {name: "SubPairsUint16x16", argLength: 2, commutative: false}, {name: "SubPairsUint32x4", argLength: 2, commutative: false}, - {name: "SubPairsUint32x8", argLength: 2, commutative: false}, {name: "SubSaturatedInt8x16", argLength: 2, commutative: false}, {name: "SubSaturatedInt8x32", argLength: 2, commutative: false}, {name: "SubSaturatedInt8x64", argLength: 2, commutative: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 00d581ec9a..7b70dc2686 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1214,6 +1214,12 @@ const ( OpAMD64VPMOVVec64x2ToM OpAMD64VPMOVVec64x4ToM OpAMD64VPMOVVec64x8ToM + OpAMD64VPMOVMSKB128 + OpAMD64VPMOVMSKB256 + OpAMD64VMOVMSKPS128 + OpAMD64VMOVMSKPS256 + OpAMD64VMOVMSKPD128 + OpAMD64VMOVMSKPD256 OpAMD64Zero128 OpAMD64Zero256 OpAMD64Zero512 @@ -1693,18 +1699,6 @@ const ( OpAMD64VPCOMPRESSWMasked128 OpAMD64VPCOMPRESSWMasked256 OpAMD64VPCOMPRESSWMasked512 - OpAMD64VPDPBUSD128 - OpAMD64VPDPBUSD256 - OpAMD64VPDPBUSD512 - OpAMD64VPDPBUSDMasked128 - OpAMD64VPDPBUSDMasked256 - OpAMD64VPDPBUSDMasked512 - OpAMD64VPDPBUSDS128 - OpAMD64VPDPBUSDS256 - OpAMD64VPDPBUSDS512 - OpAMD64VPDPBUSDSMasked128 - OpAMD64VPDPBUSDSMasked256 - OpAMD64VPDPBUSDSMasked512 OpAMD64VPDPWSSD128 OpAMD64VPDPWSSD256 OpAMD64VPDPWSSD512 @@ -2021,12 +2015,24 @@ const ( OpAMD64VPMOVSXWQMasked128 OpAMD64VPMOVSXWQMasked256 OpAMD64VPMOVSXWQMasked512 + OpAMD64VPMOVUSDB128_128 + OpAMD64VPMOVUSDB128_256 + OpAMD64VPMOVUSDB128_512 + OpAMD64VPMOVUSDBMasked128_128 + OpAMD64VPMOVUSDBMasked128_256 + OpAMD64VPMOVUSDBMasked128_512 OpAMD64VPMOVUSDW128_128 OpAMD64VPMOVUSDW128_256 OpAMD64VPMOVUSDW256 OpAMD64VPMOVUSDWMasked128_128 OpAMD64VPMOVUSDWMasked128_256 OpAMD64VPMOVUSDWMasked256 + OpAMD64VPMOVUSQB128_128 + OpAMD64VPMOVUSQB128_256 + OpAMD64VPMOVUSQB128_512 + OpAMD64VPMOVUSQBMasked128_128 + OpAMD64VPMOVUSQBMasked128_256 + OpAMD64VPMOVUSQBMasked128_512 OpAMD64VPMOVUSQD128_128 OpAMD64VPMOVUSQD128_256 OpAMD64VPMOVUSQD256 @@ -2039,7 +2045,11 @@ const ( OpAMD64VPMOVUSQWMasked128_128 OpAMD64VPMOVUSQWMasked128_256 OpAMD64VPMOVUSQWMasked128_512 + OpAMD64VPMOVUSWB128_128 + OpAMD64VPMOVUSWB128_256 OpAMD64VPMOVUSWB256 + OpAMD64VPMOVUSWBMasked128_128 + OpAMD64VPMOVUSWBMasked128_256 OpAMD64VPMOVUSWBMasked256 OpAMD64VPMOVWB128_128 OpAMD64VPMOVWB128_256 @@ -2939,14 +2949,6 @@ const ( OpAMD64VPCMPEQQ512load OpAMD64VPCMPGTD512load OpAMD64VPCMPGTQ512load - OpAMD64VPDPBUSD512load - OpAMD64VPDPBUSDMasked128load - OpAMD64VPDPBUSDMasked256load - OpAMD64VPDPBUSDMasked512load - OpAMD64VPDPBUSDS512load - OpAMD64VPDPBUSDSMasked128load - OpAMD64VPDPBUSDSMasked256load - OpAMD64VPDPBUSDSMasked512load OpAMD64VPDPWSSD512load OpAMD64VPDPWSSDMasked128load OpAMD64VPDPWSSDMasked256load @@ -3623,15 +3625,23 @@ const ( OpAMD64VPMOVSXWQMasked128Merging OpAMD64VPMOVSXWQMasked256Merging OpAMD64VPMOVSXWQMasked512Merging + OpAMD64VPMOVUSDBMasked128_128Merging + OpAMD64VPMOVUSDBMasked128_256Merging + OpAMD64VPMOVUSDBMasked128_512Merging OpAMD64VPMOVUSDWMasked128_128Merging OpAMD64VPMOVUSDWMasked128_256Merging OpAMD64VPMOVUSDWMasked256Merging + OpAMD64VPMOVUSQBMasked128_128Merging + OpAMD64VPMOVUSQBMasked128_256Merging + OpAMD64VPMOVUSQBMasked128_512Merging OpAMD64VPMOVUSQDMasked128_128Merging OpAMD64VPMOVUSQDMasked128_256Merging OpAMD64VPMOVUSQDMasked256Merging OpAMD64VPMOVUSQWMasked128_128Merging OpAMD64VPMOVUSQWMasked128_256Merging OpAMD64VPMOVUSQWMasked128_512Merging + OpAMD64VPMOVUSWBMasked128_128Merging + OpAMD64VPMOVUSWBMasked128_256Merging OpAMD64VPMOVUSWBMasked256Merging OpAMD64VPMOVWBMasked128_128Merging OpAMD64VPMOVWBMasked128_256Merging @@ -6154,6 +6164,12 @@ const ( OpCvtMask64x4to8 OpCvtMask64x8to8 OpIsZeroVec + OpIsNaNFloat32x4 + OpIsNaNFloat32x8 + OpIsNaNFloat32x16 + OpIsNaNFloat64x2 + OpIsNaNFloat64x4 + OpIsNaNFloat64x8 OpAESDecryptLastRoundUint8x16 OpAESDecryptLastRoundUint8x32 OpAESDecryptLastRoundUint8x64 @@ -6198,19 +6214,19 @@ const ( OpAddInt64x4 OpAddInt64x8 OpAddPairsFloat32x4 - OpAddPairsFloat32x8 OpAddPairsFloat64x2 - OpAddPairsFloat64x4 + OpAddPairsGroupedFloat32x8 + OpAddPairsGroupedFloat64x4 + OpAddPairsGroupedInt16x16 + OpAddPairsGroupedInt32x8 + OpAddPairsGroupedUint16x16 + OpAddPairsGroupedUint32x8 OpAddPairsInt16x8 - OpAddPairsInt16x16 OpAddPairsInt32x4 - OpAddPairsInt32x8 + OpAddPairsSaturatedGroupedInt16x16 OpAddPairsSaturatedInt16x8 - OpAddPairsSaturatedInt16x16 OpAddPairsUint16x8 - OpAddPairsUint16x16 OpAddPairsUint32x4 - OpAddPairsUint32x8 OpAddSaturatedInt8x16 OpAddSaturatedInt8x32 OpAddSaturatedInt8x64 @@ -6454,12 +6470,6 @@ const ( OpDotProductPairsSaturatedUint8x16 OpDotProductPairsSaturatedUint8x32 OpDotProductPairsSaturatedUint8x64 - OpDotProductQuadrupleInt32x4 - OpDotProductQuadrupleInt32x8 - OpDotProductQuadrupleInt32x16 - OpDotProductQuadrupleSaturatedInt32x4 - OpDotProductQuadrupleSaturatedInt32x8 - OpDotProductQuadrupleSaturatedInt32x16 OpEqualFloat32x4 OpEqualFloat32x8 OpEqualFloat32x16 @@ -6520,26 +6530,26 @@ const ( OpExpandUint64x2 OpExpandUint64x4 OpExpandUint64x8 - OpExtendLo2ToInt64x2Int8x16 - OpExtendLo2ToInt64x2Int16x8 - OpExtendLo2ToInt64x2Int32x4 - OpExtendLo2ToUint64x2Uint8x16 - OpExtendLo2ToUint64x2Uint16x8 - OpExtendLo2ToUint64x2Uint32x4 - OpExtendLo4ToInt32x4Int8x16 - OpExtendLo4ToInt32x4Int16x8 - OpExtendLo4ToInt64x4Int8x16 - OpExtendLo4ToInt64x4Int16x8 - OpExtendLo4ToUint32x4Uint8x16 - OpExtendLo4ToUint32x4Uint16x8 - OpExtendLo4ToUint64x4Uint8x16 - OpExtendLo4ToUint64x4Uint16x8 - OpExtendLo8ToInt16x8Int8x16 - OpExtendLo8ToInt32x8Int8x16 - OpExtendLo8ToInt64x8Int8x16 - OpExtendLo8ToUint16x8Uint8x16 - OpExtendLo8ToUint32x8Uint8x16 - OpExtendLo8ToUint64x8Uint8x16 + OpExtendLo2ToInt64Int8x16 + OpExtendLo2ToInt64Int16x8 + OpExtendLo2ToInt64Int32x4 + OpExtendLo2ToUint64Uint8x16 + OpExtendLo2ToUint64Uint16x8 + OpExtendLo2ToUint64Uint32x4 + OpExtendLo4ToInt32Int8x16 + OpExtendLo4ToInt32Int16x8 + OpExtendLo4ToInt64Int8x16 + OpExtendLo4ToInt64Int16x8 + OpExtendLo4ToUint32Uint8x16 + OpExtendLo4ToUint32Uint16x8 + OpExtendLo4ToUint64Uint8x16 + OpExtendLo4ToUint64Uint16x8 + OpExtendLo8ToInt16Int8x16 + OpExtendLo8ToInt32Int8x16 + OpExtendLo8ToInt64Int8x16 + OpExtendLo8ToUint16Uint8x16 + OpExtendLo8ToUint32Uint8x16 + OpExtendLo8ToUint64Uint8x16 OpExtendToInt16Int8x16 OpExtendToInt16Int8x32 OpExtendToInt32Int8x16 @@ -6675,12 +6685,6 @@ const ( OpInterleaveLoUint16x8 OpInterleaveLoUint32x4 OpInterleaveLoUint64x2 - OpIsNanFloat32x4 - OpIsNanFloat32x8 - OpIsNanFloat32x16 - OpIsNanFloat64x2 - OpIsNanFloat64x4 - OpIsNanFloat64x8 OpLeadingZerosInt32x4 OpLeadingZerosInt32x8 OpLeadingZerosInt32x16 @@ -6980,9 +6984,9 @@ const ( OpSaturateToInt8Int64x2 OpSaturateToInt8Int64x4 OpSaturateToInt8Int64x8 + OpSaturateToInt16ConcatGroupedInt32x8 + OpSaturateToInt16ConcatGroupedInt32x16 OpSaturateToInt16ConcatInt32x4 - OpSaturateToInt16ConcatInt32x8 - OpSaturateToInt16ConcatInt32x16 OpSaturateToInt16Int32x4 OpSaturateToInt16Int32x8 OpSaturateToInt16Int32x16 @@ -6992,18 +6996,18 @@ const ( OpSaturateToInt32Int64x2 OpSaturateToInt32Int64x4 OpSaturateToInt32Int64x8 - OpSaturateToUint8Int16x8 - OpSaturateToUint8Int16x16 - OpSaturateToUint8Int32x4 - OpSaturateToUint8Int32x8 - OpSaturateToUint8Int32x16 - OpSaturateToUint8Int64x2 - OpSaturateToUint8Int64x4 - OpSaturateToUint8Int64x8 + OpSaturateToUint8Uint16x8 + OpSaturateToUint8Uint16x16 OpSaturateToUint8Uint16x32 - OpSaturateToUint16ConcatUint32x4 - OpSaturateToUint16ConcatUint32x8 - OpSaturateToUint16ConcatUint32x16 + OpSaturateToUint8Uint32x4 + OpSaturateToUint8Uint32x8 + OpSaturateToUint8Uint32x16 + OpSaturateToUint8Uint64x2 + OpSaturateToUint8Uint64x4 + OpSaturateToUint8Uint64x8 + OpSaturateToUint16ConcatGroupedInt32x8 + OpSaturateToUint16ConcatGroupedInt32x16 + OpSaturateToUint16ConcatInt32x4 OpSaturateToUint16Uint32x4 OpSaturateToUint16Uint32x8 OpSaturateToUint16Uint32x16 @@ -7192,19 +7196,19 @@ const ( OpSubInt64x4 OpSubInt64x8 OpSubPairsFloat32x4 - OpSubPairsFloat32x8 OpSubPairsFloat64x2 - OpSubPairsFloat64x4 + OpSubPairsGroupedFloat32x8 + OpSubPairsGroupedFloat64x4 + OpSubPairsGroupedInt16x16 + OpSubPairsGroupedInt32x8 + OpSubPairsGroupedUint16x16 + OpSubPairsGroupedUint32x8 OpSubPairsInt16x8 - OpSubPairsInt16x16 OpSubPairsInt32x4 - OpSubPairsInt32x8 + OpSubPairsSaturatedGroupedInt16x16 OpSubPairsSaturatedInt16x8 - OpSubPairsSaturatedInt16x16 OpSubPairsUint16x8 - OpSubPairsUint16x16 OpSubPairsUint32x4 - OpSubPairsUint32x8 OpSubSaturatedInt8x16 OpSubSaturatedInt8x32 OpSubSaturatedInt8x64 @@ -20354,6 +20358,84 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "VPMOVMSKB128", + argLen: 1, + asm: x86.AVPMOVMSKB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "VPMOVMSKB256", + argLen: 1, + asm: x86.AVPMOVMSKB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "VMOVMSKPS128", + argLen: 1, + asm: x86.AVMOVMSKPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "VMOVMSKPS256", + argLen: 1, + asm: x86.AVMOVMSKPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "VMOVMSKPD128", + argLen: 1, + asm: x86.AVMOVMSKPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "VMOVMSKPD256", + argLen: 1, + asm: x86.AVMOVMSKPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { name: "Zero128", argLen: 0, zeroWidth: true, @@ -27270,204 +27352,6 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSD128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPDPBUSD256", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPDPBUSD512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDS128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPDPBUSDS256", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPDPBUSDS512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { name: "VPDPWSSD128", argLen: 3, resultInArg0: true, @@ -32104,6 +31988,87 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "VPMOVUSDB128_128", + argLen: 1, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDB128_256", + argLen: 1, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDB128_512", + argLen: 1, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_256", + argLen: 2, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_512", + argLen: 2, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { name: "VPMOVUSDW128_128", argLen: 1, asm: x86.AVPMOVUSDW, @@ -32185,6 +32150,87 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "VPMOVUSQB128_128", + argLen: 1, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQB128_256", + argLen: 1, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQB128_512", + argLen: 1, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_256", + argLen: 2, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_512", + argLen: 2, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { name: "VPMOVUSQD128_128", argLen: 1, asm: x86.AVPMOVUSQD, @@ -32347,6 +32393,32 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "VPMOVUSWB128_128", + argLen: 1, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWB128_256", + argLen: 1, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { name: "VPMOVUSWB256", argLen: 1, asm: x86.AVPMOVUSWB, @@ -32360,6 +32432,34 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "VPMOVUSWBMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWBMasked128_256", + argLen: 2, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { name: "VPMOVUSWBMasked256", argLen: 2, asm: x86.AVPMOVUSWB, @@ -45952,156 +46052,6 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSD512load", - auxType: auxSymOff, - argLen: 4, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked128load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked256load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDMasked512load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDS512load", - auxType: auxSymOff, - argLen: 4, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked128load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked256load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPDPBUSDSMasked512load", - auxType: auxSymOff, - argLen: 5, - resultInArg0: true, - symEffect: SymRead, - asm: x86.AVPDPBUSDS, - reg: regInfo{ - inputs: []inputInfo{ - {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { name: "VPDPWSSD512load", auxType: auxSymOff, argLen: 4, @@ -57269,6 +57219,54 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "VPMOVUSDBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { name: "VPMOVUSDWMasked128_128Merging", argLen: 3, resultInArg0: true, @@ -57317,6 +57315,54 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "VPMOVUSQBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { name: "VPMOVUSQDMasked128_128Merging", argLen: 3, resultInArg0: true, @@ -57413,6 +57459,38 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "VPMOVUSWBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { name: "VPMOVUSWBMasked256Merging", argLen: 3, resultInArg0: true, @@ -89000,6 +89078,36 @@ var opcodeTable = [...]opInfo{ generic: true, }, { + name: "IsNaNFloat32x4", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat32x8", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat32x16", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat64x2", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat64x4", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat64x8", + argLen: 1, + generic: true, + }, + { name: "AESDecryptLastRoundUint8x16", argLen: 2, generic: true, @@ -89238,67 +89346,67 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "AddPairsFloat32x8", + name: "AddPairsFloat64x2", argLen: 2, generic: true, }, { - name: "AddPairsFloat64x2", + name: "AddPairsGroupedFloat32x8", argLen: 2, generic: true, }, { - name: "AddPairsFloat64x4", + name: "AddPairsGroupedFloat64x4", argLen: 2, generic: true, }, { - name: "AddPairsInt16x8", + name: "AddPairsGroupedInt16x16", argLen: 2, generic: true, }, { - name: "AddPairsInt16x16", + name: "AddPairsGroupedInt32x8", argLen: 2, generic: true, }, { - name: "AddPairsInt32x4", + name: "AddPairsGroupedUint16x16", argLen: 2, generic: true, }, { - name: "AddPairsInt32x8", + name: "AddPairsGroupedUint32x8", argLen: 2, generic: true, }, { - name: "AddPairsSaturatedInt16x8", + name: "AddPairsInt16x8", argLen: 2, generic: true, }, { - name: "AddPairsSaturatedInt16x16", + name: "AddPairsInt32x4", argLen: 2, generic: true, }, { - name: "AddPairsUint16x8", + name: "AddPairsSaturatedGroupedInt16x16", argLen: 2, generic: true, }, { - name: "AddPairsUint16x16", + name: "AddPairsSaturatedInt16x8", argLen: 2, generic: true, }, { - name: "AddPairsUint32x4", + name: "AddPairsUint16x8", argLen: 2, generic: true, }, { - name: "AddPairsUint32x8", + name: "AddPairsUint32x4", argLen: 2, generic: true, }, @@ -90572,36 +90680,6 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "DotProductQuadrupleInt32x4", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleInt32x8", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleInt32x16", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleSaturatedInt32x4", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleSaturatedInt32x8", - argLen: 3, - generic: true, - }, - { - name: "DotProductQuadrupleSaturatedInt32x16", - argLen: 3, - generic: true, - }, - { name: "EqualFloat32x4", argLen: 2, commutative: true, @@ -90932,102 +91010,102 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ExtendLo2ToInt64x2Int8x16", + name: "ExtendLo2ToInt64Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo2ToInt64x2Int16x8", + name: "ExtendLo2ToInt64Int16x8", argLen: 1, generic: true, }, { - name: "ExtendLo2ToInt64x2Int32x4", + name: "ExtendLo2ToInt64Int32x4", argLen: 1, generic: true, }, { - name: "ExtendLo2ToUint64x2Uint8x16", + name: "ExtendLo2ToUint64Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo2ToUint64x2Uint16x8", + name: "ExtendLo2ToUint64Uint16x8", argLen: 1, generic: true, }, { - name: "ExtendLo2ToUint64x2Uint32x4", + name: "ExtendLo2ToUint64Uint32x4", argLen: 1, generic: true, }, { - name: "ExtendLo4ToInt32x4Int8x16", + name: "ExtendLo4ToInt32Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo4ToInt32x4Int16x8", + name: "ExtendLo4ToInt32Int16x8", argLen: 1, generic: true, }, { - name: "ExtendLo4ToInt64x4Int8x16", + name: "ExtendLo4ToInt64Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo4ToInt64x4Int16x8", + name: "ExtendLo4ToInt64Int16x8", argLen: 1, generic: true, }, { - name: "ExtendLo4ToUint32x4Uint8x16", + name: "ExtendLo4ToUint32Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo4ToUint32x4Uint16x8", + name: "ExtendLo4ToUint32Uint16x8", argLen: 1, generic: true, }, { - name: "ExtendLo4ToUint64x4Uint8x16", + name: "ExtendLo4ToUint64Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo4ToUint64x4Uint16x8", + name: "ExtendLo4ToUint64Uint16x8", argLen: 1, generic: true, }, { - name: "ExtendLo8ToInt16x8Int8x16", + name: "ExtendLo8ToInt16Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToInt32x8Int8x16", + name: "ExtendLo8ToInt32Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToInt64x8Int8x16", + name: "ExtendLo8ToInt64Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToUint16x8Uint8x16", + name: "ExtendLo8ToUint16Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToUint32x8Uint8x16", + name: "ExtendLo8ToUint32Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToUint64x8Uint8x16", + name: "ExtendLo8ToUint64Uint8x16", argLen: 1, generic: true, }, @@ -91707,42 +91785,6 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "IsNanFloat32x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat32x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat32x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat64x8", - argLen: 2, - commutative: true, - generic: true, - }, - { name: "LeadingZerosInt32x4", argLen: 1, generic: true, @@ -93370,17 +93412,17 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "SaturateToInt16ConcatInt32x4", + name: "SaturateToInt16ConcatGroupedInt32x8", argLen: 2, generic: true, }, { - name: "SaturateToInt16ConcatInt32x8", + name: "SaturateToInt16ConcatGroupedInt32x16", argLen: 2, generic: true, }, { - name: "SaturateToInt16ConcatInt32x16", + name: "SaturateToInt16ConcatInt32x4", argLen: 2, generic: true, }, @@ -93430,62 +93472,62 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "SaturateToUint8Int16x8", + name: "SaturateToUint8Uint16x8", argLen: 1, generic: true, }, { - name: "SaturateToUint8Int16x16", + name: "SaturateToUint8Uint16x16", argLen: 1, generic: true, }, { - name: "SaturateToUint8Int32x4", + name: "SaturateToUint8Uint16x32", argLen: 1, generic: true, }, { - name: "SaturateToUint8Int32x8", + name: "SaturateToUint8Uint32x4", argLen: 1, generic: true, }, { - name: "SaturateToUint8Int32x16", + name: "SaturateToUint8Uint32x8", argLen: 1, generic: true, }, { - name: "SaturateToUint8Int64x2", + name: "SaturateToUint8Uint32x16", argLen: 1, generic: true, }, { - name: "SaturateToUint8Int64x4", + name: "SaturateToUint8Uint64x2", argLen: 1, generic: true, }, { - name: "SaturateToUint8Int64x8", + name: "SaturateToUint8Uint64x4", argLen: 1, generic: true, }, { - name: "SaturateToUint8Uint16x32", + name: "SaturateToUint8Uint64x8", argLen: 1, generic: true, }, { - name: "SaturateToUint16ConcatUint32x4", + name: "SaturateToUint16ConcatGroupedInt32x8", argLen: 2, generic: true, }, { - name: "SaturateToUint16ConcatUint32x8", + name: "SaturateToUint16ConcatGroupedInt32x16", argLen: 2, generic: true, }, { - name: "SaturateToUint16ConcatUint32x16", + name: "SaturateToUint16ConcatInt32x4", argLen: 2, generic: true, }, @@ -94430,67 +94472,67 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "SubPairsFloat32x8", + name: "SubPairsFloat64x2", argLen: 2, generic: true, }, { - name: "SubPairsFloat64x2", + name: "SubPairsGroupedFloat32x8", argLen: 2, generic: true, }, { - name: "SubPairsFloat64x4", + name: "SubPairsGroupedFloat64x4", argLen: 2, generic: true, }, { - name: "SubPairsInt16x8", + name: "SubPairsGroupedInt16x16", argLen: 2, generic: true, }, { - name: "SubPairsInt16x16", + name: "SubPairsGroupedInt32x8", argLen: 2, generic: true, }, { - name: "SubPairsInt32x4", + name: "SubPairsGroupedUint16x16", argLen: 2, generic: true, }, { - name: "SubPairsInt32x8", + name: "SubPairsGroupedUint32x8", argLen: 2, generic: true, }, { - name: "SubPairsSaturatedInt16x8", + name: "SubPairsInt16x8", argLen: 2, generic: true, }, { - name: "SubPairsSaturatedInt16x16", + name: "SubPairsInt32x4", argLen: 2, generic: true, }, { - name: "SubPairsUint16x8", + name: "SubPairsSaturatedGroupedInt16x16", argLen: 2, generic: true, }, { - name: "SubPairsUint16x16", + name: "SubPairsSaturatedInt16x8", argLen: 2, generic: true, }, { - name: "SubPairsUint32x4", + name: "SubPairsUint16x8", argLen: 2, generic: true, }, { - name: "SubPairsUint32x8", + name: "SubPairsUint32x4", argLen: 2, generic: true, }, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 19f16e1cbb..e84bf19c83 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1006,10 +1006,6 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPACKUSDWMasked256(v) case OpAMD64VPACKUSDWMasked512: return rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v) - case OpAMD64VPADDD128: - return rewriteValueAMD64_OpAMD64VPADDD128(v) - case OpAMD64VPADDD256: - return rewriteValueAMD64_OpAMD64VPADDD256(v) case OpAMD64VPADDD512: return rewriteValueAMD64_OpAMD64VPADDD512(v) case OpAMD64VPADDDMasked128: @@ -1126,22 +1122,6 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v) case OpAMD64VPCMPUQMasked512: return rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v) - case OpAMD64VPDPBUSD512: - return rewriteValueAMD64_OpAMD64VPDPBUSD512(v) - case OpAMD64VPDPBUSDMasked128: - return rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v) - case OpAMD64VPDPBUSDMasked256: - return rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v) - case OpAMD64VPDPBUSDMasked512: - return rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v) - case OpAMD64VPDPBUSDS512: - return rewriteValueAMD64_OpAMD64VPDPBUSDS512(v) - case OpAMD64VPDPBUSDSMasked128: - return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v) - case OpAMD64VPDPBUSDSMasked256: - return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v) - case OpAMD64VPDPBUSDSMasked512: - return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v) case OpAMD64VPDPWSSD512: return rewriteValueAMD64_OpAMD64VPDPWSSD512(v) case OpAMD64VPDPWSSDMasked128: @@ -1402,6 +1382,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPOPCNTQMasked256(v) case OpAMD64VPOPCNTQMasked512: return rewriteValueAMD64_OpAMD64VPOPCNTQMasked512(v) + case OpAMD64VPOR128: + return rewriteValueAMD64_OpAMD64VPOR128(v) + case OpAMD64VPOR256: + return rewriteValueAMD64_OpAMD64VPOR256(v) case OpAMD64VPORD512: return rewriteValueAMD64_OpAMD64VPORD512(v) case OpAMD64VPORDMasked128: @@ -2133,45 +2117,45 @@ func rewriteValueAMD64(v *Value) bool { case OpAddPairsFloat32x4: v.Op = OpAMD64VHADDPS128 return true - case OpAddPairsFloat32x8: - v.Op = OpAMD64VHADDPS256 - return true case OpAddPairsFloat64x2: v.Op = OpAMD64VHADDPD128 return true - case OpAddPairsFloat64x4: + case OpAddPairsGroupedFloat32x8: + v.Op = OpAMD64VHADDPS256 + return true + case OpAddPairsGroupedFloat64x4: v.Op = OpAMD64VHADDPD256 return true - case OpAddPairsInt16x16: + case OpAddPairsGroupedInt16x16: + v.Op = OpAMD64VPHADDW256 + return true + case OpAddPairsGroupedInt32x8: + v.Op = OpAMD64VPHADDD256 + return true + case OpAddPairsGroupedUint16x16: v.Op = OpAMD64VPHADDW256 return true + case OpAddPairsGroupedUint32x8: + v.Op = OpAMD64VPHADDD256 + return true case OpAddPairsInt16x8: v.Op = OpAMD64VPHADDW128 return true case OpAddPairsInt32x4: v.Op = OpAMD64VPHADDD128 return true - case OpAddPairsInt32x8: - v.Op = OpAMD64VPHADDD256 - return true - case OpAddPairsSaturatedInt16x16: + case OpAddPairsSaturatedGroupedInt16x16: v.Op = OpAMD64VPHADDSW256 return true case OpAddPairsSaturatedInt16x8: v.Op = OpAMD64VPHADDSW128 return true - case OpAddPairsUint16x16: - v.Op = OpAMD64VPHADDW256 - return true case OpAddPairsUint16x8: v.Op = OpAMD64VPHADDW128 return true case OpAddPairsUint32x4: v.Op = OpAMD64VPHADDD128 return true - case OpAddPairsUint32x8: - v.Op = OpAMD64VPHADDD256 - return true case OpAddPtr: v.Op = OpAMD64ADDQ return true @@ -3066,19 +3050,25 @@ func rewriteValueAMD64(v *Value) bool { case OpCvtMask32x16to16: return rewriteValueAMD64_OpCvtMask32x16to16(v) case OpCvtMask32x4to8: - return rewriteValueAMD64_OpCvtMask32x4to8(v) + v.Op = OpAMD64VMOVMSKPS128 + return true case OpCvtMask32x8to8: - return rewriteValueAMD64_OpCvtMask32x8to8(v) + v.Op = OpAMD64VMOVMSKPS256 + return true case OpCvtMask64x2to8: - return rewriteValueAMD64_OpCvtMask64x2to8(v) + v.Op = OpAMD64VMOVMSKPD128 + return true case OpCvtMask64x4to8: - return rewriteValueAMD64_OpCvtMask64x4to8(v) + v.Op = OpAMD64VMOVMSKPD256 + return true case OpCvtMask64x8to8: return rewriteValueAMD64_OpCvtMask64x8to8(v) case OpCvtMask8x16to16: - return rewriteValueAMD64_OpCvtMask8x16to16(v) + v.Op = OpAMD64VPMOVMSKB128 + return true case OpCvtMask8x32to32: - return rewriteValueAMD64_OpCvtMask8x32to32(v) + v.Op = OpAMD64VPMOVMSKB256 + return true case OpCvtMask8x64to64: return rewriteValueAMD64_OpCvtMask8x64to64(v) case OpDiv128u: @@ -3142,24 +3132,6 @@ func rewriteValueAMD64(v *Value) bool { case OpDotProductPairsSaturatedUint8x64: v.Op = OpAMD64VPMADDUBSW512 return true - case OpDotProductQuadrupleInt32x16: - v.Op = OpAMD64VPDPBUSD512 - return true - case OpDotProductQuadrupleInt32x4: - v.Op = OpAMD64VPDPBUSD128 - return true - case OpDotProductQuadrupleInt32x8: - v.Op = OpAMD64VPDPBUSD256 - return true - case OpDotProductQuadrupleSaturatedInt32x16: - v.Op = OpAMD64VPDPBUSDS512 - return true - case OpDotProductQuadrupleSaturatedInt32x4: - v.Op = OpAMD64VPDPBUSDS128 - return true - case OpDotProductQuadrupleSaturatedInt32x8: - v.Op = OpAMD64VPDPBUSDS256 - return true case OpEq16: return rewriteValueAMD64_OpEq16(v) case OpEq32: @@ -3312,64 +3284,64 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpExpandUint8x32(v) case OpExpandUint8x64: return rewriteValueAMD64_OpExpandUint8x64(v) - case OpExtendLo2ToInt64x2Int16x8: + case OpExtendLo2ToInt64Int16x8: v.Op = OpAMD64VPMOVSXWQ128 return true - case OpExtendLo2ToInt64x2Int32x4: + case OpExtendLo2ToInt64Int32x4: v.Op = OpAMD64VPMOVSXDQ128 return true - case OpExtendLo2ToInt64x2Int8x16: + case OpExtendLo2ToInt64Int8x16: v.Op = OpAMD64VPMOVSXBQ128 return true - case OpExtendLo2ToUint64x2Uint16x8: + case OpExtendLo2ToUint64Uint16x8: v.Op = OpAMD64VPMOVZXWQ128 return true - case OpExtendLo2ToUint64x2Uint32x4: + case OpExtendLo2ToUint64Uint32x4: v.Op = OpAMD64VPMOVZXDQ128 return true - case OpExtendLo2ToUint64x2Uint8x16: + case OpExtendLo2ToUint64Uint8x16: v.Op = OpAMD64VPMOVZXBQ128 return true - case OpExtendLo4ToInt32x4Int16x8: + case OpExtendLo4ToInt32Int16x8: v.Op = OpAMD64VPMOVSXWD128 return true - case OpExtendLo4ToInt32x4Int8x16: + case OpExtendLo4ToInt32Int8x16: v.Op = OpAMD64VPMOVSXBD128 return true - case OpExtendLo4ToInt64x4Int16x8: + case OpExtendLo4ToInt64Int16x8: v.Op = OpAMD64VPMOVSXWQ256 return true - case OpExtendLo4ToInt64x4Int8x16: + case OpExtendLo4ToInt64Int8x16: v.Op = OpAMD64VPMOVSXBQ256 return true - case OpExtendLo4ToUint32x4Uint16x8: + case OpExtendLo4ToUint32Uint16x8: v.Op = OpAMD64VPMOVZXWD128 return true - case OpExtendLo4ToUint32x4Uint8x16: + case OpExtendLo4ToUint32Uint8x16: v.Op = OpAMD64VPMOVZXBD128 return true - case OpExtendLo4ToUint64x4Uint16x8: + case OpExtendLo4ToUint64Uint16x8: v.Op = OpAMD64VPMOVZXWQ256 return true - case OpExtendLo4ToUint64x4Uint8x16: + case OpExtendLo4ToUint64Uint8x16: v.Op = OpAMD64VPMOVZXBQ256 return true - case OpExtendLo8ToInt16x8Int8x16: + case OpExtendLo8ToInt16Int8x16: v.Op = OpAMD64VPMOVSXBW128 return true - case OpExtendLo8ToInt32x8Int8x16: + case OpExtendLo8ToInt32Int8x16: v.Op = OpAMD64VPMOVSXBD256 return true - case OpExtendLo8ToInt64x8Int8x16: + case OpExtendLo8ToInt64Int8x16: v.Op = OpAMD64VPMOVSXBQ512 return true - case OpExtendLo8ToUint16x8Uint8x16: + case OpExtendLo8ToUint16Uint8x16: v.Op = OpAMD64VPMOVZXBW128 return true - case OpExtendLo8ToUint32x8Uint8x16: + case OpExtendLo8ToUint32Uint8x16: v.Op = OpAMD64VPMOVZXBD256 return true - case OpExtendLo8ToUint64x8Uint8x16: + case OpExtendLo8ToUint64Uint8x16: v.Op = OpAMD64VPMOVZXBQ512 return true case OpExtendToInt16Int8x16: @@ -3811,18 +3783,18 @@ func rewriteValueAMD64(v *Value) bool { return true case OpIsInBounds: return rewriteValueAMD64_OpIsInBounds(v) - case OpIsNanFloat32x16: - return rewriteValueAMD64_OpIsNanFloat32x16(v) - case OpIsNanFloat32x4: - return rewriteValueAMD64_OpIsNanFloat32x4(v) - case OpIsNanFloat32x8: - return rewriteValueAMD64_OpIsNanFloat32x8(v) - case OpIsNanFloat64x2: - return rewriteValueAMD64_OpIsNanFloat64x2(v) - case OpIsNanFloat64x4: - return rewriteValueAMD64_OpIsNanFloat64x4(v) - case OpIsNanFloat64x8: - return rewriteValueAMD64_OpIsNanFloat64x8(v) + case OpIsNaNFloat32x16: + return rewriteValueAMD64_OpIsNaNFloat32x16(v) + case OpIsNaNFloat32x4: + return rewriteValueAMD64_OpIsNaNFloat32x4(v) + case OpIsNaNFloat32x8: + return rewriteValueAMD64_OpIsNaNFloat32x8(v) + case OpIsNaNFloat64x2: + return rewriteValueAMD64_OpIsNaNFloat64x2(v) + case OpIsNaNFloat64x4: + return rewriteValueAMD64_OpIsNaNFloat64x4(v) + case OpIsNaNFloat64x8: + return rewriteValueAMD64_OpIsNaNFloat64x8(v) case OpIsNonNil: return rewriteValueAMD64_OpIsNonNil(v) case OpIsSliceInBounds: @@ -5040,15 +5012,15 @@ func rewriteValueAMD64(v *Value) bool { case OpSHA256TwoRoundsUint32x4: v.Op = OpAMD64SHA256RNDS2128 return true - case OpSaturateToInt16ConcatInt32x16: + case OpSaturateToInt16ConcatGroupedInt32x16: v.Op = OpAMD64VPACKSSDW512 return true + case OpSaturateToInt16ConcatGroupedInt32x8: + v.Op = OpAMD64VPACKSSDW256 + return true case OpSaturateToInt16ConcatInt32x4: v.Op = OpAMD64VPACKSSDW128 return true - case OpSaturateToInt16ConcatInt32x8: - v.Op = OpAMD64VPACKSSDW256 - return true case OpSaturateToInt16Int32x16: v.Op = OpAMD64VPMOVSDW256 return true @@ -5103,15 +5075,15 @@ func rewriteValueAMD64(v *Value) bool { case OpSaturateToInt8Int64x8: v.Op = OpAMD64VPMOVSQB128_512 return true - case OpSaturateToUint16ConcatUint32x16: + case OpSaturateToUint16ConcatGroupedInt32x16: v.Op = OpAMD64VPACKUSDW512 return true - case OpSaturateToUint16ConcatUint32x4: - v.Op = OpAMD64VPACKUSDW128 - return true - case OpSaturateToUint16ConcatUint32x8: + case OpSaturateToUint16ConcatGroupedInt32x8: v.Op = OpAMD64VPACKUSDW256 return true + case OpSaturateToUint16ConcatInt32x4: + v.Op = OpAMD64VPACKUSDW128 + return true case OpSaturateToUint16Uint32x16: v.Op = OpAMD64VPMOVUSDW256 return true @@ -5139,32 +5111,32 @@ func rewriteValueAMD64(v *Value) bool { case OpSaturateToUint32Uint64x8: v.Op = OpAMD64VPMOVUSQD256 return true - case OpSaturateToUint8Int16x16: - v.Op = OpAMD64VPMOVSWB128_256 + case OpSaturateToUint8Uint16x16: + v.Op = OpAMD64VPMOVUSWB128_256 return true - case OpSaturateToUint8Int16x8: - v.Op = OpAMD64VPMOVSWB128_128 + case OpSaturateToUint8Uint16x32: + v.Op = OpAMD64VPMOVUSWB256 return true - case OpSaturateToUint8Int32x16: - v.Op = OpAMD64VPMOVSDB128_512 + case OpSaturateToUint8Uint16x8: + v.Op = OpAMD64VPMOVUSWB128_128 return true - case OpSaturateToUint8Int32x4: - v.Op = OpAMD64VPMOVSDB128_128 + case OpSaturateToUint8Uint32x16: + v.Op = OpAMD64VPMOVUSDB128_512 return true - case OpSaturateToUint8Int32x8: - v.Op = OpAMD64VPMOVSDB128_256 + case OpSaturateToUint8Uint32x4: + v.Op = OpAMD64VPMOVUSDB128_128 return true - case OpSaturateToUint8Int64x2: - v.Op = OpAMD64VPMOVSQB128_128 + case OpSaturateToUint8Uint32x8: + v.Op = OpAMD64VPMOVUSDB128_256 return true - case OpSaturateToUint8Int64x4: - v.Op = OpAMD64VPMOVSQB128_256 + case OpSaturateToUint8Uint64x2: + v.Op = OpAMD64VPMOVUSQB128_128 return true - case OpSaturateToUint8Int64x8: - v.Op = OpAMD64VPMOVSQB128_512 + case OpSaturateToUint8Uint64x4: + v.Op = OpAMD64VPMOVUSQB128_256 return true - case OpSaturateToUint8Uint16x32: - v.Op = OpAMD64VPMOVUSWB256 + case OpSaturateToUint8Uint64x8: + v.Op = OpAMD64VPMOVUSQB128_512 return true case OpScaleFloat32x16: v.Op = OpAMD64VSCALEFPS512 @@ -5898,45 +5870,45 @@ func rewriteValueAMD64(v *Value) bool { case OpSubPairsFloat32x4: v.Op = OpAMD64VHSUBPS128 return true - case OpSubPairsFloat32x8: - v.Op = OpAMD64VHSUBPS256 - return true case OpSubPairsFloat64x2: v.Op = OpAMD64VHSUBPD128 return true - case OpSubPairsFloat64x4: + case OpSubPairsGroupedFloat32x8: + v.Op = OpAMD64VHSUBPS256 + return true + case OpSubPairsGroupedFloat64x4: v.Op = OpAMD64VHSUBPD256 return true - case OpSubPairsInt16x16: + case OpSubPairsGroupedInt16x16: + v.Op = OpAMD64VPHSUBW256 + return true + case OpSubPairsGroupedInt32x8: + v.Op = OpAMD64VPHSUBD256 + return true + case OpSubPairsGroupedUint16x16: v.Op = OpAMD64VPHSUBW256 return true + case OpSubPairsGroupedUint32x8: + v.Op = OpAMD64VPHSUBD256 + return true case OpSubPairsInt16x8: v.Op = OpAMD64VPHSUBW128 return true case OpSubPairsInt32x4: v.Op = OpAMD64VPHSUBD128 return true - case OpSubPairsInt32x8: - v.Op = OpAMD64VPHSUBD256 - return true - case OpSubPairsSaturatedInt16x16: + case OpSubPairsSaturatedGroupedInt16x16: v.Op = OpAMD64VPHSUBSW256 return true case OpSubPairsSaturatedInt16x8: v.Op = OpAMD64VPHSUBSW128 return true - case OpSubPairsUint16x16: - v.Op = OpAMD64VPHSUBW256 - return true case OpSubPairsUint16x8: v.Op = OpAMD64VPHSUBW128 return true case OpSubPairsUint32x4: v.Op = OpAMD64VPHSUBD128 return true - case OpSubPairsUint32x8: - v.Op = OpAMD64VPHSUBD256 - return true case OpSubPtr: v.Op = OpAMD64SUBQ return true @@ -28763,7 +28735,7 @@ func rewriteValueAMD64_OpAMD64VCMPPD512(v *Value) bool { v_0 := v.Args[0] // match: (VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VCMPPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -28779,7 +28751,7 @@ func rewriteValueAMD64_OpAMD64VCMPPD512(v *Value) bool { break } v.reset(OpAMD64VCMPPD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -28792,7 +28764,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VCMPPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -28809,7 +28781,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked128(v *Value) bool { break } v.reset(OpAMD64VCMPPDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -28822,7 +28794,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VCMPPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -28839,7 +28811,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked256(v *Value) bool { break } v.reset(OpAMD64VCMPPDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -28852,7 +28824,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VCMPPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -28869,7 +28841,7 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked512(v *Value) bool { break } v.reset(OpAMD64VCMPPDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -28881,7 +28853,7 @@ func rewriteValueAMD64_OpAMD64VCMPPS512(v *Value) bool { v_0 := v.Args[0] // match: (VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -28897,7 +28869,7 @@ func rewriteValueAMD64_OpAMD64VCMPPS512(v *Value) bool { break } v.reset(OpAMD64VCMPPS512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -28910,7 +28882,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VCMPPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -28927,7 +28899,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked128(v *Value) bool { break } v.reset(OpAMD64VCMPPSMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -28940,7 +28912,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VCMPPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -28957,7 +28929,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked256(v *Value) bool { break } v.reset(OpAMD64VCMPPSMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -28970,7 +28942,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VCMPPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -28987,7 +28959,7 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked512(v *Value) bool { break } v.reset(OpAMD64VCMPPSMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -32605,7 +32577,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB128(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32621,7 +32593,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB128(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEINVQB128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -32633,7 +32605,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB256(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32649,7 +32621,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB256(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEINVQB256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -32661,7 +32633,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB512(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32677,7 +32649,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB512(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEINVQB512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -32690,7 +32662,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32707,7 +32679,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked128(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -32720,7 +32692,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32737,7 +32709,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked256(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -32750,7 +32722,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32767,7 +32739,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked512(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -32779,7 +32751,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB128(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32795,7 +32767,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB128(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEQB128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -32807,7 +32779,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB256(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32823,7 +32795,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB256(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEQB256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -32835,7 +32807,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB512(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32851,7 +32823,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB512(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEQB512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -32864,7 +32836,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32881,7 +32853,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked128(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEQBMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -32894,7 +32866,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32911,7 +32883,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked256(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEQBMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -32924,7 +32896,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -32941,7 +32913,7 @@ func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked512(v *Value) bool { break } v.reset(OpAMD64VGF2P8AFFINEQBMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -33775,6 +33747,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) + // result: (VPMOVUSWBMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSWB128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSWBMasked128_128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked128 (VPSHLDW128 [a] x y) mask) // result: (VPSHLDWMasked128 [a] x y mask) for { @@ -34327,6 +34311,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) + // result: (VPMOVUSWBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSWB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSWBMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) // result: (VPMOVUSWBMasked256 x mask) for { @@ -35294,34 +35290,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU32Masked128 (VPDPBUSD128 x y z) mask) - // result: (VPDPBUSDMasked128 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSD128 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPDPBUSDS128 x y z) mask) - // result: (VPDPBUSDSMasked128 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSDS128 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDSMasked128) - v.AddArg4(x, y, z, mask) - return true - } // match: (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) // result: (VPMOVSXDQMasked128 x mask) for { @@ -35607,6 +35575,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) + // result: (VPMOVUSDBMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDB128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) // result: (VPACKUSDWMasked128 x y mask) for { @@ -36129,34 +36109,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU32Masked256 (VPDPBUSD256 x y z) mask) - // result: (VPDPBUSDMasked256 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSD256 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDMasked256) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPDPBUSDS256 x y z) mask) - // result: (VPDPBUSDSMasked256 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSDS256 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDSMasked256) - v.AddArg4(x, y, z, mask) - return true - } // match: (VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask) // result: (VPMOVSXDQMasked256 x mask) for { @@ -36480,6 +36432,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) + // result: (VPMOVUSDBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) // result: (VPACKUSDWMasked256 x y mask) for { @@ -37052,34 +37016,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) - // result: (VPDPBUSDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSD512 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) - // result: (VPDPBUSDSMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSDS512 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDSMasked512) - v.AddArg4(x, y, z, mask) - return true - } // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) // result: (VPMOVSXDQMasked512 x mask) for { @@ -37416,6 +37352,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg3(x, y, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) + // result: (VPMOVUSDBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) // result: (VPACKUSDWMasked512 x y mask) for { @@ -38259,6 +38207,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) + // result: (VPMOVUSQBMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQB128_128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_128) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) // result: (VPMOVUSQWMasked128_128 x mask) for { @@ -39100,6 +39060,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) + // result: (VPMOVUSQBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) // result: (VPMOVUSQWMasked128_256 x mask) for { @@ -39920,6 +39892,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) + // result: (VPMOVUSQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) // result: (VPMOVUSQWMasked128_512 x mask) for { @@ -42407,151 +42391,9 @@ func rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPADDD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDD128 (VPDPBUSD128 (Zero128 <t>) x y) z) - // result: (VPDPBUSD128 <t> z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSD128 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero128 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSD128) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - // match: (VPADDD128 (VPDPBUSDS128 (Zero128 <t>) x y) z) - // result: (VPDPBUSDS128 <t> z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSDS128 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero128 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSDS128) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPADDD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDD256 (VPDPBUSD256 (Zero256 <t>) x y) z) - // result: (VPDPBUSD256 <t> z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSD256 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero256 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSD256) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - // match: (VPADDD256 (VPDPBUSDS256 (Zero256 <t>) x y) z) - // result: (VPDPBUSDS256 <t> z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSDS256 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero256 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSDS256) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - return false -} func rewriteValueAMD64_OpAMD64VPADDD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPADDD512 (VPDPBUSD512 (Zero512 <t>) x y) z) - // result: (VPDPBUSD512 <t> z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSD512 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero512 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSD512) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } - // match: (VPADDD512 (VPDPBUSDS512 (Zero512 <t>) x y) z) - // result: (VPDPBUSDS512 <t> z x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPDPBUSDS512 { - continue - } - y := v_0.Args[2] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64Zero512 { - continue - } - t := v_0_0.Type - x := v_0.Args[1] - z := v_1 - v.reset(OpAMD64VPDPBUSDS512) - v.Type = t - v.AddArg3(z, x, y) - return true - } - break - } // match: (VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) // result: (VPADDD512load {sym} [off] x ptr mem) @@ -44109,6 +43951,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } + // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) + // result: (VPMOVUSDBMasked128_512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSDB128_512 { + break + } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVUSDBMasked128_512Merging) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) // result: (VPMOVUSDWMasked256Merging dst x mask) for { @@ -44869,6 +44724,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } + // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) + // result: (VPMOVUSQBMasked128_512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQB128_512 { + break + } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVUSQBMasked128_512Merging) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) // result: (VPMOVUSQDMasked256Merging dst x mask) for { @@ -47797,6 +47665,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSDB128_128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSDBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) @@ -47816,6 +47703,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQB128_128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSQBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) @@ -47854,6 +47760,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSWB128_128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSWBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB128 dst (VPMOVWB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) @@ -50990,6 +50915,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSDB128_256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSDBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) @@ -51009,6 +50953,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQB128_256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) @@ -51047,6 +51010,25 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSWB128_256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSWBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB256 dst (VPMOVWB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) @@ -52553,7 +52535,7 @@ func rewriteValueAMD64_OpAMD64VPCMPD512(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPCMPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52569,7 +52551,7 @@ func rewriteValueAMD64_OpAMD64VPCMPD512(v *Value) bool { break } v.reset(OpAMD64VPCMPD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -52582,7 +52564,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52599,7 +52581,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked128(v *Value) bool { break } v.reset(OpAMD64VPCMPDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -52612,7 +52594,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52629,7 +52611,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked256(v *Value) bool { break } v.reset(OpAMD64VPCMPDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -52642,7 +52624,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52659,7 +52641,7 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked512(v *Value) bool { break } v.reset(OpAMD64VPCMPDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -52785,7 +52767,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQ512(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPCMPQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52801,7 +52783,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQ512(v *Value) bool { break } v.reset(OpAMD64VPCMPQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -52814,7 +52796,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52831,7 +52813,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked128(v *Value) bool { break } v.reset(OpAMD64VPCMPQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -52844,7 +52826,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52861,7 +52843,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked256(v *Value) bool { break } v.reset(OpAMD64VPCMPQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -52874,7 +52856,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52891,7 +52873,7 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked512(v *Value) bool { break } v.reset(OpAMD64VPCMPQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -52903,7 +52885,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUD512(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPCMPUD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52919,7 +52901,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUD512(v *Value) bool { break } v.reset(OpAMD64VPCMPUD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -52932,7 +52914,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPUDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52949,7 +52931,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v *Value) bool { break } v.reset(OpAMD64VPCMPUDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -52962,7 +52944,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPUDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -52979,7 +52961,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v *Value) bool { break } v.reset(OpAMD64VPCMPUDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -52992,7 +52974,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPUDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -53009,7 +52991,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v *Value) bool { break } v.reset(OpAMD64VPCMPUDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -53021,7 +53003,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQ512(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPCMPUQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -53037,7 +53019,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQ512(v *Value) bool { break } v.reset(OpAMD64VPCMPUQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -53050,7 +53032,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPUQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -53067,7 +53049,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v *Value) bool { break } v.reset(OpAMD64VPCMPUQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -53080,7 +53062,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPUQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -53097,7 +53079,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v *Value) bool { break } v.reset(OpAMD64VPCMPUQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -53110,7 +53092,7 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPCMPUQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -53127,257 +53109,13 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool { break } v.reset(OpAMD64VPCMPUQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPDPBUSD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSD512load {sym} [off] x y ptr mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDS512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDS512load {sym} [off] x y ptr mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem) - for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPDPBUSDSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} func rewriteValueAMD64_OpAMD64VPDPWSSD512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -57040,9 +56778,173 @@ func rewriteValueAMD64_OpAMD64VPOPCNTQMasked512(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPOR128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPOR128 (VCMPPS128 [3] x x) (VCMPPS128 [3] y y)) + // result: (VCMPPS128 [3] x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VCMPPS128 || auxIntToUint8(v_0.AuxInt) != 3 { + continue + } + x := v_0.Args[1] + if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS128 || auxIntToUint8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { + continue + } + v.reset(OpAMD64VCMPPS128) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, y) + return true + } + break + } + // match: (VPOR128 (VCMPPD128 [3] x x) (VCMPPD128 [3] y y)) + // result: (VCMPPD128 [3] x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VCMPPD128 || auxIntToUint8(v_0.AuxInt) != 3 { + continue + } + x := v_0.Args[1] + if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPD128 || auxIntToUint8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { + continue + } + v.reset(OpAMD64VCMPPD128) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, y) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPOR256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPOR256 (VCMPPS256 [3] x x) (VCMPPS256 [3] y y)) + // result: (VCMPPS256 [3] x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VCMPPS256 || auxIntToUint8(v_0.AuxInt) != 3 { + continue + } + x := v_0.Args[1] + if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS256 || auxIntToUint8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { + continue + } + v.reset(OpAMD64VCMPPS256) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, y) + return true + } + break + } + // match: (VPOR256 (VCMPPD256 [3] x x) (VCMPPD256 [3] y y)) + // result: (VCMPPD256 [3] x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VCMPPD256 || auxIntToUint8(v_0.AuxInt) != 3 { + continue + } + x := v_0.Args[1] + if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPD256 || auxIntToUint8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { + continue + } + v.reset(OpAMD64VCMPPD256) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, y) + return true + } + break + } + return false +} func rewriteValueAMD64_OpAMD64VPORD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (VPORD512 (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) (VPMOVMToVec32x16 (VCMPPS512 [3] y y))) + // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VPMOVMToVec32x16 { + continue + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_0_0.AuxInt) != 3 { + continue + } + x := v_0_0.Args[1] + if x != v_0_0.Args[0] || v_1.Op != OpAMD64VPMOVMToVec32x16 { + continue + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_1_0.AuxInt) != 3 { + continue + } + y := v_1_0.Args[1] + if y != v_1_0.Args[0] { + continue + } + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(3) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (VPORD512 (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) (VPMOVMToVec64x8 (VCMPPD512 [3] y y))) + // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VPMOVMToVec64x8 { + continue + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64VCMPPD512 || auxIntToUint8(v_0_0.AuxInt) != 3 { + continue + } + x := v_0_0.Args[1] + if x != v_0_0.Args[0] || v_1.Op != OpAMD64VPMOVMToVec64x8 { + continue + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64VCMPPD512 || auxIntToUint8(v_1_0.AuxInt) != 3 { + continue + } + y := v_1_0.Args[1] + if y != v_1_0.Args[0] { + continue + } + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(3) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } // match: (VPORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) // result: (VPORD512load {sym} [off] x ptr mem) @@ -57296,7 +57198,7 @@ func rewriteValueAMD64_OpAMD64VPROLD128(v *Value) bool { v_0 := v.Args[0] // match: (VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPROLD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57311,7 +57213,7 @@ func rewriteValueAMD64_OpAMD64VPROLD128(v *Value) bool { break } v.reset(OpAMD64VPROLD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -57322,7 +57224,7 @@ func rewriteValueAMD64_OpAMD64VPROLD256(v *Value) bool { v_0 := v.Args[0] // match: (VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPROLD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57337,7 +57239,7 @@ func rewriteValueAMD64_OpAMD64VPROLD256(v *Value) bool { break } v.reset(OpAMD64VPROLD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -57348,7 +57250,7 @@ func rewriteValueAMD64_OpAMD64VPROLD512(v *Value) bool { v_0 := v.Args[0] // match: (VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPROLD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57363,7 +57265,7 @@ func rewriteValueAMD64_OpAMD64VPROLD512(v *Value) bool { break } v.reset(OpAMD64VPROLD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -57375,7 +57277,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPROLDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57391,7 +57293,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked128(v *Value) bool { break } v.reset(OpAMD64VPROLDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -57403,7 +57305,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPROLDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57419,7 +57321,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked256(v *Value) bool { break } v.reset(OpAMD64VPROLDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -57431,7 +57333,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPROLDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57447,7 +57349,7 @@ func rewriteValueAMD64_OpAMD64VPROLDMasked512(v *Value) bool { break } v.reset(OpAMD64VPROLDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -57458,7 +57360,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ128(v *Value) bool { v_0 := v.Args[0] // match: (VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPROLQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57473,7 +57375,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ128(v *Value) bool { break } v.reset(OpAMD64VPROLQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -57484,7 +57386,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ256(v *Value) bool { v_0 := v.Args[0] // match: (VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPROLQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57499,7 +57401,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ256(v *Value) bool { break } v.reset(OpAMD64VPROLQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -57510,7 +57412,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ512(v *Value) bool { v_0 := v.Args[0] // match: (VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPROLQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57525,7 +57427,7 @@ func rewriteValueAMD64_OpAMD64VPROLQ512(v *Value) bool { break } v.reset(OpAMD64VPROLQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -57537,7 +57439,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPROLQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57553,7 +57455,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked128(v *Value) bool { break } v.reset(OpAMD64VPROLQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -57565,7 +57467,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPROLQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57581,7 +57483,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked256(v *Value) bool { break } v.reset(OpAMD64VPROLQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -57593,7 +57495,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPROLQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57609,7 +57511,7 @@ func rewriteValueAMD64_OpAMD64VPROLQMasked512(v *Value) bool { break } v.reset(OpAMD64VPROLQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -57956,7 +57858,7 @@ func rewriteValueAMD64_OpAMD64VPRORD128(v *Value) bool { v_0 := v.Args[0] // match: (VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPRORD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57971,7 +57873,7 @@ func rewriteValueAMD64_OpAMD64VPRORD128(v *Value) bool { break } v.reset(OpAMD64VPRORD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -57982,7 +57884,7 @@ func rewriteValueAMD64_OpAMD64VPRORD256(v *Value) bool { v_0 := v.Args[0] // match: (VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPRORD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -57997,7 +57899,7 @@ func rewriteValueAMD64_OpAMD64VPRORD256(v *Value) bool { break } v.reset(OpAMD64VPRORD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -58008,7 +57910,7 @@ func rewriteValueAMD64_OpAMD64VPRORD512(v *Value) bool { v_0 := v.Args[0] // match: (VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPRORD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58023,7 +57925,7 @@ func rewriteValueAMD64_OpAMD64VPRORD512(v *Value) bool { break } v.reset(OpAMD64VPRORD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -58035,7 +57937,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPRORDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58051,7 +57953,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked128(v *Value) bool { break } v.reset(OpAMD64VPRORDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -58063,7 +57965,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPRORDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58079,7 +57981,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked256(v *Value) bool { break } v.reset(OpAMD64VPRORDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -58091,7 +57993,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPRORDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58107,7 +58009,7 @@ func rewriteValueAMD64_OpAMD64VPRORDMasked512(v *Value) bool { break } v.reset(OpAMD64VPRORDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -58118,7 +58020,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ128(v *Value) bool { v_0 := v.Args[0] // match: (VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPRORQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58133,7 +58035,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ128(v *Value) bool { break } v.reset(OpAMD64VPRORQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -58144,7 +58046,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ256(v *Value) bool { v_0 := v.Args[0] // match: (VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPRORQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58159,7 +58061,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ256(v *Value) bool { break } v.reset(OpAMD64VPRORQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -58170,7 +58072,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ512(v *Value) bool { v_0 := v.Args[0] // match: (VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPRORQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58185,7 +58087,7 @@ func rewriteValueAMD64_OpAMD64VPRORQ512(v *Value) bool { break } v.reset(OpAMD64VPRORQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -58197,7 +58099,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPRORQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58213,7 +58115,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked128(v *Value) bool { break } v.reset(OpAMD64VPRORQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -58225,7 +58127,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPRORQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58241,7 +58143,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked256(v *Value) bool { break } v.reset(OpAMD64VPRORQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -58253,7 +58155,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPRORQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -58269,7 +58171,7 @@ func rewriteValueAMD64_OpAMD64VPRORQMasked512(v *Value) bool { break } v.reset(OpAMD64VPRORQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -58617,7 +58519,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD128(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHLDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58633,7 +58535,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD128(v *Value) bool { break } v.reset(OpAMD64VPSHLDD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -58645,7 +58547,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD256(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHLDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58661,7 +58563,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD256(v *Value) bool { break } v.reset(OpAMD64VPSHLDD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -58673,7 +58575,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHLDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58689,7 +58591,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDD512(v *Value) bool { break } v.reset(OpAMD64VPSHLDD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -58702,7 +58604,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHLDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58719,7 +58621,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked128(v *Value) bool { break } v.reset(OpAMD64VPSHLDDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -58732,7 +58634,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHLDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58749,7 +58651,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked256(v *Value) bool { break } v.reset(OpAMD64VPSHLDDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -58762,7 +58664,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHLDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58779,7 +58681,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDDMasked512(v *Value) bool { break } v.reset(OpAMD64VPSHLDDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -58791,7 +58693,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ128(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHLDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58807,7 +58709,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ128(v *Value) bool { break } v.reset(OpAMD64VPSHLDQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -58819,7 +58721,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ256(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHLDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58835,7 +58737,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ256(v *Value) bool { break } v.reset(OpAMD64VPSHLDQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -58847,7 +58749,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHLDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58863,7 +58765,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQ512(v *Value) bool { break } v.reset(OpAMD64VPSHLDQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -58876,7 +58778,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHLDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58893,7 +58795,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked128(v *Value) bool { break } v.reset(OpAMD64VPSHLDQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -58906,7 +58808,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHLDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58923,7 +58825,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked256(v *Value) bool { break } v.reset(OpAMD64VPSHLDQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -58936,7 +58838,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHLDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -58953,7 +58855,7 @@ func rewriteValueAMD64_OpAMD64VPSHLDQMasked512(v *Value) bool { break } v.reset(OpAMD64VPSHLDQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -59325,7 +59227,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD128(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHRDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59341,7 +59243,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD128(v *Value) bool { break } v.reset(OpAMD64VPSHRDD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -59353,7 +59255,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD256(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHRDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59369,7 +59271,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD256(v *Value) bool { break } v.reset(OpAMD64VPSHRDD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -59381,7 +59283,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHRDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59397,7 +59299,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDD512(v *Value) bool { break } v.reset(OpAMD64VPSHRDD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -59410,7 +59312,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHRDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59427,7 +59329,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked128(v *Value) bool { break } v.reset(OpAMD64VPSHRDDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -59440,7 +59342,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHRDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59457,7 +59359,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked256(v *Value) bool { break } v.reset(OpAMD64VPSHRDDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -59470,7 +59372,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHRDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59487,7 +59389,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDDMasked512(v *Value) bool { break } v.reset(OpAMD64VPSHRDDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -59499,7 +59401,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ128(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHRDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59515,7 +59417,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ128(v *Value) bool { break } v.reset(OpAMD64VPSHRDQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -59527,7 +59429,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ256(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHRDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59543,7 +59445,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ256(v *Value) bool { break } v.reset(OpAMD64VPSHRDQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -59555,7 +59457,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VPSHRDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59571,7 +59473,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQ512(v *Value) bool { break } v.reset(OpAMD64VPSHRDQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -59584,7 +59486,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHRDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59601,7 +59503,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked128(v *Value) bool { break } v.reset(OpAMD64VPSHRDQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -59614,7 +59516,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHRDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59631,7 +59533,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked256(v *Value) bool { break } v.reset(OpAMD64VPSHRDQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -59644,7 +59546,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + // result: (VPSHRDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -59661,7 +59563,7 @@ func rewriteValueAMD64_OpAMD64VPSHRDQMasked512(v *Value) bool { break } v.reset(OpAMD64VPSHRDQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) return true @@ -60032,7 +59934,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFD512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHUFD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPSHUFD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60047,7 +59949,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFD512(v *Value) bool { break } v.reset(OpAMD64VPSHUFD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -60059,7 +59961,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHUFDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSHUFDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60075,7 +59977,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked128(v *Value) bool { break } v.reset(OpAMD64VPSHUFDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -60087,7 +59989,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHUFDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSHUFDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60103,7 +60005,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked256(v *Value) bool { break } v.reset(OpAMD64VPSHUFDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -60115,7 +60017,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHUFDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSHUFDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60131,7 +60033,7 @@ func rewriteValueAMD64_OpAMD64VPSHUFDMasked512(v *Value) bool { break } v.reset(OpAMD64VPSHUFDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -60196,7 +60098,7 @@ func rewriteValueAMD64_OpAMD64VPSLLD512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPSLLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60211,7 +60113,7 @@ func rewriteValueAMD64_OpAMD64VPSLLD512const(v *Value) bool { break } v.reset(OpAMD64VPSLLD512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -60243,7 +60145,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked128const(v *Value) bool { v_0 := v.Args[0] // match: (VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSLLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60259,7 +60161,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked128const(v *Value) bool { break } v.reset(OpAMD64VPSLLDMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -60291,7 +60193,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked256const(v *Value) bool { v_0 := v.Args[0] // match: (VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSLLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60307,7 +60209,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked256const(v *Value) bool { break } v.reset(OpAMD64VPSLLDMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -60339,7 +60241,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSLLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60355,7 +60257,7 @@ func rewriteValueAMD64_OpAMD64VPSLLDMasked512const(v *Value) bool { break } v.reset(OpAMD64VPSLLDMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -60420,7 +60322,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQ512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPSLLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60435,7 +60337,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQ512const(v *Value) bool { break } v.reset(OpAMD64VPSLLQ512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -60467,7 +60369,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked128const(v *Value) bool { v_0 := v.Args[0] // match: (VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSLLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60483,7 +60385,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked128const(v *Value) bool { break } v.reset(OpAMD64VPSLLQMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -60515,7 +60417,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked256const(v *Value) bool { v_0 := v.Args[0] // match: (VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSLLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60531,7 +60433,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked256const(v *Value) bool { break } v.reset(OpAMD64VPSLLQMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -60563,7 +60465,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSLLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -60579,7 +60481,7 @@ func rewriteValueAMD64_OpAMD64VPSLLQMasked512const(v *Value) bool { break } v.reset(OpAMD64VPSLLQMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -60986,7 +60888,7 @@ func rewriteValueAMD64_OpAMD64VPSRAD512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPSRAD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61001,7 +60903,7 @@ func rewriteValueAMD64_OpAMD64VPSRAD512const(v *Value) bool { break } v.reset(OpAMD64VPSRAD512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -61033,7 +60935,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked128const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRADMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRADMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61049,7 +60951,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked128const(v *Value) bool { break } v.reset(OpAMD64VPSRADMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61081,7 +60983,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked256const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRADMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRADMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61097,7 +60999,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked256const(v *Value) bool { break } v.reset(OpAMD64VPSRADMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61129,7 +61031,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRADMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRADMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61145,7 +61047,7 @@ func rewriteValueAMD64_OpAMD64VPSRADMasked512const(v *Value) bool { break } v.reset(OpAMD64VPSRADMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61174,7 +61076,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ128const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQ128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPSRAQ128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61189,7 +61091,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ128const(v *Value) bool { break } v.reset(OpAMD64VPSRAQ128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -61218,7 +61120,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ256const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQ256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPSRAQ256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61233,7 +61135,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ256const(v *Value) bool { break } v.reset(OpAMD64VPSRAQ256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -61262,7 +61164,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPSRAQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61277,7 +61179,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQ512const(v *Value) bool { break } v.reset(OpAMD64VPSRAQ512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -61309,7 +61211,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRAQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61325,7 +61227,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v *Value) bool { break } v.reset(OpAMD64VPSRAQMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61357,7 +61259,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRAQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61373,7 +61275,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v *Value) bool { break } v.reset(OpAMD64VPSRAQMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61405,7 +61307,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRAQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61421,7 +61323,7 @@ func rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v *Value) bool { break } v.reset(OpAMD64VPSRAQMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61828,7 +61730,7 @@ func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPSRLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61843,7 +61745,7 @@ func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool { break } v.reset(OpAMD64VPSRLD512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -61855,7 +61757,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61871,7 +61773,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool { break } v.reset(OpAMD64VPSRLDMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61883,7 +61785,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61899,7 +61801,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool { break } v.reset(OpAMD64VPSRLDMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61911,7 +61813,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61927,7 +61829,7 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool { break } v.reset(OpAMD64VPSRLDMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61938,7 +61840,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VPSRLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61953,7 +61855,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool { break } v.reset(OpAMD64VPSRLQ512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -61965,7 +61867,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -61981,7 +61883,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool { break } v.reset(OpAMD64VPSRLQMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -61993,7 +61895,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -62009,7 +61911,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool { break } v.reset(OpAMD64VPSRLQMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -62021,7 +61923,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VPSRLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -62037,7 +61939,7 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool { break } v.reset(OpAMD64VPSRLQMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -62506,7 +62408,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD128(v *Value) bool { v_0 := v.Args[0] // match: (VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) + // result: (VPTERNLOGD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -62523,7 +62425,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD128(v *Value) bool { break } v.reset(OpAMD64VPTERNLOGD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, y, ptr, mem) return true @@ -62536,7 +62438,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD256(v *Value) bool { v_0 := v.Args[0] // match: (VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGD256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) + // result: (VPTERNLOGD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -62553,7 +62455,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD256(v *Value) bool { break } v.reset(OpAMD64VPTERNLOGD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, y, ptr, mem) return true @@ -62566,7 +62468,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD512(v *Value) bool { v_0 := v.Args[0] // match: (VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGD512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) + // result: (VPTERNLOGD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -62583,7 +62485,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGD512(v *Value) bool { break } v.reset(OpAMD64VPTERNLOGD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, y, ptr, mem) return true @@ -62596,7 +62498,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v *Value) bool { v_0 := v.Args[0] // match: (VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) + // result: (VPTERNLOGQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -62613,7 +62515,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v *Value) bool { break } v.reset(OpAMD64VPTERNLOGQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, y, ptr, mem) return true @@ -62626,7 +62528,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v *Value) bool { v_0 := v.Args[0] // match: (VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) + // result: (VPTERNLOGQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -62643,7 +62545,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v *Value) bool { break } v.reset(OpAMD64VPTERNLOGQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, y, ptr, mem) return true @@ -62656,7 +62558,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v *Value) bool { v_0 := v.Args[0] // match: (VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem) + // result: (VPTERNLOGQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -62673,7 +62575,7 @@ func rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v *Value) bool { break } v.reset(OpAMD64VPTERNLOGQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg4(x, y, ptr, mem) return true @@ -63306,7 +63208,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD128(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VREDUCEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63321,7 +63223,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD128(v *Value) bool { break } v.reset(OpAMD64VREDUCEPD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63332,7 +63234,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD256(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VREDUCEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63347,7 +63249,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD256(v *Value) bool { break } v.reset(OpAMD64VREDUCEPD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63358,7 +63260,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD512(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VREDUCEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63373,7 +63275,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPD512(v *Value) bool { break } v.reset(OpAMD64VREDUCEPD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63385,7 +63287,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63401,7 +63303,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked128(v *Value) bool { break } v.reset(OpAMD64VREDUCEPDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63413,7 +63315,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63429,7 +63331,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked256(v *Value) bool { break } v.reset(OpAMD64VREDUCEPDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63441,7 +63343,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63457,7 +63359,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked512(v *Value) bool { break } v.reset(OpAMD64VREDUCEPDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63468,7 +63370,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS128(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VREDUCEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63483,7 +63385,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS128(v *Value) bool { break } v.reset(OpAMD64VREDUCEPS128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63494,7 +63396,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS256(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VREDUCEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63509,7 +63411,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS256(v *Value) bool { break } v.reset(OpAMD64VREDUCEPS256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63520,7 +63422,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS512(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VREDUCEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63535,7 +63437,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS512(v *Value) bool { break } v.reset(OpAMD64VREDUCEPS512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63547,7 +63449,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63563,7 +63465,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked128(v *Value) bool { break } v.reset(OpAMD64VREDUCEPSMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63575,7 +63477,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63591,7 +63493,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked256(v *Value) bool { break } v.reset(OpAMD64VREDUCEPSMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63603,7 +63505,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63619,7 +63521,7 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked512(v *Value) bool { break } v.reset(OpAMD64VREDUCEPSMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63630,7 +63532,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD128(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VRNDSCALEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63645,7 +63547,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD128(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63656,7 +63558,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD256(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VRNDSCALEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63671,7 +63573,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD256(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63682,7 +63584,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD512(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VRNDSCALEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63697,7 +63599,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD512(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63709,7 +63611,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63725,7 +63627,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked128(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63737,7 +63639,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63753,7 +63655,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked256(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63765,7 +63667,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63781,7 +63683,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked512(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63792,7 +63694,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS128(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VRNDSCALEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63807,7 +63709,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS128(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPS128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63818,7 +63720,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS256(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VRNDSCALEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63833,7 +63735,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS256(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPS256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63844,7 +63746,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS512(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // result: (VRNDSCALEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63859,7 +63761,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS512(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPS512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true @@ -63871,7 +63773,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked128(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63887,7 +63789,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked128(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPSMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63899,7 +63801,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked256(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63915,7 +63817,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked256(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPSMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -63927,7 +63829,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked512(v *Value) bool { v_0 := v.Args[0] // match: (VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + // result: (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { c := auxIntToUint8(v.AuxInt) l := v_0 @@ -63943,7 +63845,7 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked512(v *Value) bool { break } v.reset(OpAMD64VRNDSCALEPSMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true @@ -64553,7 +64455,7 @@ func rewriteValueAMD64_OpAMD64VSHUFPD512(v *Value) bool { v_0 := v.Args[0] // match: (VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSHUFPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VSHUFPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -64569,7 +64471,7 @@ func rewriteValueAMD64_OpAMD64VSHUFPD512(v *Value) bool { break } v.reset(OpAMD64VSHUFPD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -64581,7 +64483,7 @@ func rewriteValueAMD64_OpAMD64VSHUFPS512(v *Value) bool { v_0 := v.Args[0] // match: (VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSHUFPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + // result: (VSHUFPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) x := v_0 @@ -64597,7 +64499,7 @@ func rewriteValueAMD64_OpAMD64VSHUFPS512(v *Value) bool { break } v.reset(OpAMD64VSHUFPS512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) return true @@ -68826,13 +68728,11 @@ func rewriteValueAMD64_OpCvt8toMask64x8(v *Value) bool { func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (CvtMask16x16to16 <t> x) - // result: (KMOVWi <t> (VPMOVVec16x16ToM <types.TypeMask> x)) + // match: (CvtMask16x16to16 x) + // result: (KMOVWi (VPMOVVec16x16ToM <types.TypeMask> x)) for { - t := v.Type x := v_0 v.reset(OpAMD64KMOVWi) - v.Type = t v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(x) v.AddArg(v0) @@ -68842,13 +68742,11 @@ func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool { func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (CvtMask16x32to32 <t> x) - // result: (KMOVDi <t> (VPMOVVec16x32ToM <types.TypeMask> x)) + // match: (CvtMask16x32to32 x) + // result: (KMOVDi (VPMOVVec16x32ToM <types.TypeMask> x)) for { - t := v.Type x := v_0 v.reset(OpAMD64KMOVDi) - v.Type = t v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(x) v.AddArg(v0) @@ -68858,13 +68756,11 @@ func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool { func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (CvtMask16x8to8 <t> x) - // result: (KMOVBi <t> (VPMOVVec16x8ToM <types.TypeMask> x)) + // match: (CvtMask16x8to8 x) + // result: (KMOVBi (VPMOVVec16x8ToM <types.TypeMask> x)) for { - t := v.Type x := v_0 v.reset(OpAMD64KMOVBi) - v.Type = t v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(x) v.AddArg(v0) @@ -68874,141 +68770,39 @@ func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool { func rewriteValueAMD64_OpCvtMask32x16to16(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (CvtMask32x16to16 <t> x) - // result: (KMOVWi <t> (VPMOVVec32x16ToM <types.TypeMask> x)) + // match: (CvtMask32x16to16 x) + // result: (KMOVWi (VPMOVVec32x16ToM <types.TypeMask> x)) for { - t := v.Type x := v_0 v.reset(OpAMD64KMOVWi) - v.Type = t v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(x) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpCvtMask32x4to8(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (CvtMask32x4to8 <t> x) - // result: (KMOVBi <t> (VPMOVVec32x4ToM <types.TypeMask> x)) - for { - t := v.Type - x := v_0 - v.reset(OpAMD64KMOVBi) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpCvtMask32x8to8(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (CvtMask32x8to8 <t> x) - // result: (KMOVBi <t> (VPMOVVec32x8ToM <types.TypeMask> x)) - for { - t := v.Type - x := v_0 - v.reset(OpAMD64KMOVBi) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpCvtMask64x2to8(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (CvtMask64x2to8 <t> x) - // result: (KMOVBi <t> (VPMOVVec64x2ToM <types.TypeMask> x)) - for { - t := v.Type - x := v_0 - v.reset(OpAMD64KMOVBi) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpCvtMask64x4to8(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (CvtMask64x4to8 <t> x) - // result: (KMOVBi <t> (VPMOVVec64x4ToM <types.TypeMask> x)) - for { - t := v.Type - x := v_0 - v.reset(OpAMD64KMOVBi) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpCvtMask64x8to8(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (CvtMask64x8to8 <t> x) - // result: (KMOVBi <t> (VPMOVVec64x8ToM <types.TypeMask> x)) + // match: (CvtMask64x8to8 x) + // result: (KMOVBi (VPMOVVec64x8ToM <types.TypeMask> x)) for { - t := v.Type x := v_0 v.reset(OpAMD64KMOVBi) - v.Type = t v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(x) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpCvtMask8x16to16(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (CvtMask8x16to16 <t> x) - // result: (KMOVWi <t> (VPMOVVec8x16ToM <types.TypeMask> x)) - for { - t := v.Type - x := v_0 - v.reset(OpAMD64KMOVWi) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpCvtMask8x32to32(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (CvtMask8x32to32 <t> x) - // result: (KMOVDi <t> (VPMOVVec8x32ToM <types.TypeMask> x)) - for { - t := v.Type - x := v_0 - v.reset(OpAMD64KMOVDi) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpCvtMask8x64to64(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (CvtMask8x64to64 <t> x) - // result: (KMOVQi <t> (VPMOVVec8x64ToM <types.TypeMask> x)) + // match: (CvtMask8x64to64 x) + // result: (KMOVQi (VPMOVVec8x64ToM <types.TypeMask> x)) for { - t := v.Type x := v_0 v.reset(OpAMD64KMOVQi) - v.Type = t v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(x) v.AddArg(v0) @@ -71229,94 +71023,82 @@ func rewriteValueAMD64_OpIsInBounds(v *Value) bool { return true } } -func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat32x16(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (IsNanFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) + // match: (IsNaNFloat32x16 x) + // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) for { x := v_0 - y := v_1 v.reset(OpAMD64VPMOVMToVec32x16) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) v0.AuxInt = uint8ToAuxInt(3) - v0.AddArg2(x, y) + v0.AddArg2(x, x) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat32x4 x y) - // result: (VCMPPS128 [3] x y) + // match: (IsNaNFloat32x4 x) + // result: (VCMPPS128 [3] x x) for { x := v_0 - y := v_1 v.reset(OpAMD64VCMPPS128) v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) + v.AddArg2(x, x) return true } } -func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat32x8 x y) - // result: (VCMPPS256 [3] x y) + // match: (IsNaNFloat32x8 x) + // result: (VCMPPS256 [3] x x) for { x := v_0 - y := v_1 v.reset(OpAMD64VCMPPS256) v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) + v.AddArg2(x, x) return true } } -func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat64x2(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat64x2 x y) - // result: (VCMPPD128 [3] x y) + // match: (IsNaNFloat64x2 x) + // result: (VCMPPD128 [3] x x) for { x := v_0 - y := v_1 v.reset(OpAMD64VCMPPD128) v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) + v.AddArg2(x, x) return true } } -func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat64x4(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat64x4 x y) - // result: (VCMPPD256 [3] x y) + // match: (IsNaNFloat64x4 x) + // result: (VCMPPD256 [3] x x) for { x := v_0 - y := v_1 v.reset(OpAMD64VCMPPD256) v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) + v.AddArg2(x, x) return true } } -func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat64x8(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (IsNanFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) + // match: (IsNaNFloat64x8 x) + // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) for { x := v_0 - y := v_1 v.reset(OpAMD64VPMOVMToVec64x8) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) v0.AuxInt = uint8ToAuxInt(3) - v0.AddArg2(x, y) + v0.AddArg2(x, x) v.AddArg(v0) return true } diff --git a/src/cmd/compile/internal/ssa/sccp.go b/src/cmd/compile/internal/ssa/sccp.go index 9b958d0454..7ef8d6b7c1 100644 --- a/src/cmd/compile/internal/ssa/sccp.go +++ b/src/cmd/compile/internal/ssa/sccp.go @@ -507,6 +507,10 @@ func (t *worklist) propagate(block *Block) { branchIdx = 1 - condLattice.val.AuxInt } else { branchIdx = condLattice.val.AuxInt + if branchIdx < 0 || branchIdx >= int64(len(block.Succs)) { + // unreachable code, do nothing then + break + } } t.edges = append(t.edges, block.Succs[branchIdx]) } else { diff --git a/src/cmd/compile/internal/ssa/tern_helpers.go b/src/cmd/compile/internal/ssa/tern_helpers.go index 3ffc980c33..923a9f505e 100644 --- a/src/cmd/compile/internal/ssa/tern_helpers.go +++ b/src/cmd/compile/internal/ssa/tern_helpers.go @@ -1,4 +1,4 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. package ssa diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index 4425c5617b..e2eebd783d 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1667,6 +1667,12 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64) addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64) addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64) + addF(simdPackage, "Float32x4.IsNaN", opLen1(ssa.OpIsNaNFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.IsNaN", opLen1(ssa.OpIsNaNFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.IsNaN", opLen1(ssa.OpIsNaNFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.IsNaN", opLen1(ssa.OpIsNaNFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.IsNaN", opLen1(ssa.OpIsNaNFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.IsNaN", opLen1(ssa.OpIsNaNFloat64x8, types.TypeVec512), sys.AMD64) // sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go. sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) { diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 7eb5456994..4ad0c6032c 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -1,4 +1,4 @@ -// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. package ssagen @@ -69,19 +69,19 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x4.Add", opLen2(ssa.OpAddUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Add", opLen2(ssa.OpAddUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.AddPairs", opLen2(ssa.OpAddPairsFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.AddPairs", opLen2(ssa.OpAddPairsFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.AddPairs", opLen2(ssa.OpAddPairsFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.AddPairs", opLen2(ssa.OpAddPairsFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.AddPairs", opLen2(ssa.OpAddPairsInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.AddPairs", opLen2(ssa.OpAddPairsInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x4.AddPairs", opLen2(ssa.OpAddPairsInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.AddPairs", opLen2(ssa.OpAddPairsInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x8.AddPairs", opLen2(ssa.OpAddPairsUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.AddPairs", opLen2(ssa.OpAddPairsUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x4.AddPairs", opLen2(ssa.OpAddPairsUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.AddPairs", opLen2(ssa.OpAddPairsUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x8.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x4.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x16.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x8.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x16.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x8.AddPairsGrouped", opLen2(ssa.OpAddPairsGroupedUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.AddPairsSaturated", opLen2(ssa.OpAddPairsSaturatedInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.AddPairsSaturated", opLen2(ssa.OpAddPairsSaturatedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x16.AddPairsSaturatedGrouped", opLen2(ssa.OpAddPairsSaturatedGroupedInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x16.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.AddSaturated", opLen2(ssa.OpAddSaturatedInt8x64, types.TypeVec512), sys.AMD64) @@ -328,12 +328,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint8x16.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.DotProductQuadruple", opLen3_31Zero3(ssa.OpDotProductQuadrupleInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.DotProductQuadrupleSaturated", opLen3_31Zero3(ssa.OpDotProductQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64) @@ -394,26 +388,26 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Expand", opLen2(ssa.OpExpandUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Expand", opLen2(ssa.OpExpandUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Expand", opLen2(ssa.OpExpandUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x8.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x4.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x8.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x4.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x8.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x8.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x8.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x8.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo8ToInt16x8", opLen1(ssa.OpExtendLo8ToInt16x8Int8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo8ToInt32x8", opLen1(ssa.OpExtendLo8ToInt32x8Int8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo8ToInt64x8", opLen1(ssa.OpExtendLo8ToInt64x8Int8x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo8ToUint16x8", opLen1(ssa.OpExtendLo8ToUint16x8Uint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo8ToUint32x8", opLen1(ssa.OpExtendLo8ToUint32x8Uint8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo8ToUint64x8", opLen1(ssa.OpExtendLo8ToUint64x8Uint8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x4.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x4.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo4ToInt32", opLen1(ssa.OpExtendLo4ToInt32Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendLo4ToInt32", opLen1(ssa.OpExtendLo4ToInt32Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo4ToInt64", opLen1(ssa.OpExtendLo4ToInt64Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendLo4ToInt64", opLen1(ssa.OpExtendLo4ToInt64Int16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo4ToUint32", opLen1(ssa.OpExtendLo4ToUint32Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendLo4ToUint32", opLen1(ssa.OpExtendLo4ToUint32Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo4ToUint64", opLen1(ssa.OpExtendLo4ToUint64Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendLo4ToUint64", opLen1(ssa.OpExtendLo4ToUint64Uint16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo8ToInt16", opLen1(ssa.OpExtendLo8ToInt16Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo8ToInt32", opLen1(ssa.OpExtendLo8ToInt32Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo8ToInt64", opLen1(ssa.OpExtendLo8ToInt64Int8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo8ToUint16", opLen1(ssa.OpExtendLo8ToUint16Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo8ToUint32", opLen1(ssa.OpExtendLo8ToUint32Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo8ToUint64", opLen1(ssa.OpExtendLo8ToUint64Uint8x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.ExtendToInt32", opLen1(ssa.OpExtendToInt32Int8x16, types.TypeVec512), sys.AMD64) @@ -577,12 +571,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint32x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x4.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x16, types.TypeVec512), sys.AMD64) @@ -926,29 +914,29 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int64x4.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int64x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x8.SaturateToInt16", opLen1(ssa.OpSaturateToInt16Int64x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x4.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.SaturateToInt16Concat", opLen2(ssa.OpSaturateToInt16ConcatInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x8.SaturateToInt16ConcatGrouped", opLen2(ssa.OpSaturateToInt16ConcatGroupedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.SaturateToInt16ConcatGrouped", opLen2(ssa.OpSaturateToInt16ConcatGroupedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x2.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x8.SaturateToInt32", opLen1(ssa.OpSaturateToInt32Int64x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int16x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int32x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x2.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Int64x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x32.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x16.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint32x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x2.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.SaturateToUint8", opLen1(ssa.OpSaturateToUint8Uint64x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x4.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x8.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x16.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint32x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x2.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x8.SaturateToUint16", opLen1(ssa.OpSaturateToUint16Uint64x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x4.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.SaturateToUint16Concat", opLen2(ssa.OpSaturateToUint16ConcatInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.SaturateToUint16ConcatGrouped", opLen2(ssa.OpSaturateToUint16ConcatGroupedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.SaturateToUint16ConcatGrouped", opLen2(ssa.OpSaturateToUint16ConcatGroupedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x2.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x8.SaturateToUint32", opLen1(ssa.OpSaturateToUint32Uint64x8, types.TypeVec256), sys.AMD64) @@ -1199,19 +1187,19 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x4.Sub", opLen2(ssa.OpSubUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Sub", opLen2(ssa.OpSubUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.SubPairs", opLen2(ssa.OpSubPairsFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.SubPairs", opLen2(ssa.OpSubPairsFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.SubPairs", opLen2(ssa.OpSubPairsFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.SubPairs", opLen2(ssa.OpSubPairsFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.SubPairs", opLen2(ssa.OpSubPairsInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.SubPairs", opLen2(ssa.OpSubPairsInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x4.SubPairs", opLen2(ssa.OpSubPairsInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.SubPairs", opLen2(ssa.OpSubPairsInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x8.SubPairs", opLen2(ssa.OpSubPairsUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.SubPairs", opLen2(ssa.OpSubPairsUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x4.SubPairs", opLen2(ssa.OpSubPairsUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.SubPairs", opLen2(ssa.OpSubPairsUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x8.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x4.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x16.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x8.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x16.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x8.SubPairsGrouped", opLen2(ssa.OpSubPairsGroupedUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.SubPairsSaturated", opLen2(ssa.OpSubPairsSaturatedInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.SubPairsSaturated", opLen2(ssa.OpSubPairsSaturatedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x16.SubPairsSaturatedGrouped", opLen2(ssa.OpSubPairsSaturatedGroupedInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x16.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.SubSaturated", opLen2(ssa.OpSubSaturatedInt8x64, types.TypeVec512), sys.AMD64) diff --git a/src/cmd/compile/testdata/script/issue77033.txt b/src/cmd/compile/testdata/script/issue77033.txt new file mode 100644 index 0000000000..3b977e5440 --- /dev/null +++ b/src/cmd/compile/testdata/script/issue77033.txt @@ -0,0 +1,40 @@ +go test -bench=Foo -cpuprofile=default.pgo +go test -bench=Foo -pgo=default.pgo +! stdout 'FAIL' + +-- main_test.go -- +package main + +import ( + "testing" +) + +var a int + +func save(x int) { + a = x +} + +func foo() { + for i := range yield1 { + defer save(i) + } +} + +func yield1(yield func(int) bool) { + yield(1) +} + +func BenchmarkFoo(b *testing.B) { + for i := 0; i < b.N; i++ { + foo() + } + if a != 1 { + b.Fatalf("a = %d; want 1", a) + } +} + +-- go.mod -- +module demo + +go 1.24 diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go index 6d3742525c..48c3aa5efd 100644 --- a/src/cmd/dist/test.go +++ b/src/cmd/dist/test.go @@ -748,7 +748,7 @@ func (t *tester) registerTests() { if !strings.Contains(goexperiment, "jsonv2") { t.registerTest("GOEXPERIMENT=jsonv2 go test encoding/json/...", &goTest{ variant: "jsonv2", - env: []string{"GOEXPERIMENT=jsonv2"}, + env: []string{"GOEXPERIMENT=" + goexperiments("jsonv2")}, pkg: "encoding/json/...", }) } @@ -757,7 +757,7 @@ func (t *tester) registerTests() { if !strings.Contains(goexperiment, "runtimesecret") { t.registerTest("GOEXPERIMENT=runtimesecret go test runtime/secret/...", &goTest{ variant: "runtimesecret", - env: []string{"GOEXPERIMENT=runtimesecret"}, + env: []string{"GOEXPERIMENT=" + goexperiments("runtimesecret")}, pkg: "runtime/secret/...", }) } @@ -766,7 +766,7 @@ func (t *tester) registerTests() { if goarch == "amd64" && !strings.Contains(goexperiment, "simd") { t.registerTest("GOEXPERIMENT=simd go test simd/archsimd/...", &goTest{ variant: "simd", - env: []string{"GOEXPERIMENT=simd"}, + env: []string{"GOEXPERIMENT=" + goexperiments("simd")}, pkg: "simd/archsimd/...", }) } @@ -1888,3 +1888,19 @@ func fipsVersions(short bool) []string { } return versions } + +// goexperiments returns the GOEXPERIMENT value to use +// when running a test with the given experiments enabled. +// +// It preserves any existing GOEXPERIMENTs. +func goexperiments(exps ...string) string { + if len(exps) == 0 { + return goexperiment + } + existing := goexperiment + if existing != "" { + existing += "," + } + return existing + strings.Join(exps, ",") + +} diff --git a/src/cmd/go.mod b/src/cmd/go.mod index c7d3cc6136..85e8c4cb5f 100644 --- a/src/cmd/go.mod +++ b/src/cmd/go.mod @@ -11,7 +11,7 @@ require ( golang.org/x/sys v0.39.0 golang.org/x/telemetry v0.0.0-20251128220624-abf20d0e57ec golang.org/x/term v0.38.0 - golang.org/x/tools v0.39.1-0.20251205000126-062ef7b6ced2 + golang.org/x/tools v0.39.1-0.20251230210517-d44be789a05c ) require ( diff --git a/src/cmd/go.sum b/src/cmd/go.sum index b02c469a41..61c88e5253 100644 --- a/src/cmd/go.sum +++ b/src/cmd/go.sum @@ -22,7 +22,7 @@ golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q= golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= -golang.org/x/tools v0.39.1-0.20251205000126-062ef7b6ced2 h1:2Qqv605Nus9iUp3ErvEU/q92Q3HAzeROztzl9pzAno8= -golang.org/x/tools v0.39.1-0.20251205000126-062ef7b6ced2/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= +golang.org/x/tools v0.39.1-0.20251230210517-d44be789a05c h1:0pZej6BQOooNbOfjJEu4v5qx9hdwFX8HnvHCcNXcs2w= +golang.org/x/tools v0.39.1-0.20251230210517-d44be789a05c/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef h1:mqLYrXCXYEZOop9/Dbo6RPX11539nwiCNBb1icVPmw8= rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef/go.mod h1:8xcPgWmwlZONN1D9bjxtHEjrUtSEa3fakVF8iaewYKQ= diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go index fe9b862073..8c346dafdb 100644 --- a/src/cmd/go/alldocs.go +++ b/src/cmd/go/alldocs.go @@ -1954,7 +1954,7 @@ // // -o file // Save a copy of the test binary to the named file. -// The test still runs (unless -c or -i is specified). +// The test still runs (unless -c is specified). // If file ends in a slash or names an existing directory, // the test is written to pkg.test in that directory. // diff --git a/src/cmd/go/internal/doc/pkgsite.go b/src/cmd/go/internal/doc/pkgsite.go index c173167b63..dc344cbbca 100644 --- a/src/cmd/go/internal/doc/pkgsite.go +++ b/src/cmd/go/internal/doc/pkgsite.go @@ -71,7 +71,7 @@ func doPkgsite(urlPath, fragment string) error { env = append(env, "GOPROXY="+gomodcache+","+goproxy) } - const version = "v0.0.0-20250714212547-01b046e81fe7" + const version = "v0.0.0-20251223195805-1a3bd3c788fe" cmd := exec.Command(goCmd(), "run", "golang.org/x/pkgsite/cmd/internal/doc@"+version, "-gorepo", buildCtx.GOROOT, "-http", addr, diff --git a/src/cmd/go/internal/modindex/scan.go b/src/cmd/go/internal/modindex/scan.go index af2c0abe04..beded695bf 100644 --- a/src/cmd/go/internal/modindex/scan.go +++ b/src/cmd/go/internal/modindex/scan.go @@ -112,10 +112,10 @@ func parseErrorToString(err error) string { return "" } var p parseError - if e, ok := err.(scanner.ErrorList); ok { - p.ErrorList = &e + if errlist, ok := err.(scanner.ErrorList); ok { + p.ErrorList = &errlist } else { - p.ErrorString = e.Error() + p.ErrorString = err.Error() } s, err := json.Marshal(p) if err != nil { diff --git a/src/cmd/go/internal/test/test.go b/src/cmd/go/internal/test/test.go index 916943904d..9309aa65ed 100644 --- a/src/cmd/go/internal/test/test.go +++ b/src/cmd/go/internal/test/test.go @@ -163,7 +163,7 @@ In addition to the build flags, the flags handled by 'go test' itself are: -o file Save a copy of the test binary to the named file. - The test still runs (unless -c or -i is specified). + The test still runs (unless -c is specified). If file ends in a slash or names an existing directory, the test is written to pkg.test in that directory. diff --git a/src/cmd/go/testdata/script/list_empty_importpath.txt b/src/cmd/go/testdata/script/list_empty_importpath.txt index fe4210322b..0960a7795d 100644 --- a/src/cmd/go/testdata/script/list_empty_importpath.txt +++ b/src/cmd/go/testdata/script/list_empty_importpath.txt @@ -1,15 +1,6 @@ ! go list all ! stderr 'panic' -[!GOOS:windows] [!GOOS:solaris] [!GOOS:freebsd] [!GOOS:openbsd] [!GOOS:netbsd] stderr 'invalid import path' -# #73976: Allow 'no errors' on Windows, Solaris, and BSD until issue -# is resolved to prevent flakes. 'no errors' is printed by -# empty scanner.ErrorList errors so that's probably where the -# message is coming from, though we don't know how. -[GOOS:windows] stderr 'invalid import path|no errors' -[GOOS:solaris] stderr 'invalid import path|no errors' -[GOOS:freebsd] stderr 'invalid import path|no errors' -[GOOS:openbsd] stderr 'invalid import path|no errors' -[GOOS:netbsd] stderr 'invalid import path|no errors' +stderr 'invalid import path' # go list produces a package for 'p' but not for '' go list -e all diff --git a/src/cmd/go/testdata/vcstest/git/legacytest.txt b/src/cmd/go/testdata/vcstest/git/legacytest.txt index 5846983cef..6465242d62 100644 --- a/src/cmd/go/testdata/vcstest/git/legacytest.txt +++ b/src/cmd/go/testdata/vcstest/git/legacytest.txt @@ -6,7 +6,7 @@ env GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME env GIT_COMMITTER_EMAIL=$GIT_AUTHOR_EMAIL git init -git branch -M master +git checkout -b master at 2018-07-17T12:41:39-04:00 cp x_cf92c7b.go x.go diff --git a/src/cmd/internal/bootstrap_test/overlaydir_test.go b/src/cmd/internal/bootstrap_test/overlaydir_test.go index 5812c453ac..bee3214b67 100644 --- a/src/cmd/internal/bootstrap_test/overlaydir_test.go +++ b/src/cmd/internal/bootstrap_test/overlaydir_test.go @@ -43,6 +43,9 @@ func overlayDir(dstRoot, srcRoot string) error { dstPath := filepath.Join(dstRoot, suffix) info, err := entry.Info() + if err != nil { + return err + } perm := info.Mode() & os.ModePerm if info.Mode()&os.ModeSymlink != 0 { info, err = os.Stat(srcPath) diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go index bc7504e5b1..036eda13bc 100644 --- a/src/cmd/link/link_test.go +++ b/src/cmd/link/link_test.go @@ -869,6 +869,9 @@ func TestFuncAlignOption(t *testing.T) { "_main.bar": false, "_main.baz": false} syms, err := f.Symbols() + if err != nil { + t.Errorf("failed to get symbols with err %v", err) + } for _, s := range syms { fn := s.Name if _, ok := fname[fn]; !ok { diff --git a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/doc.go b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/doc.go index 45aed7909c..f1202c7a11 100644 --- a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/doc.go +++ b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/doc.go @@ -80,6 +80,8 @@ or b.ResetTimer within the same function will also be removed. Caveats: The b.Loop() method is designed to prevent the compiler from optimizing away the benchmark loop, which can occasionally result in slower execution due to increased allocations in some specific cases. +Since its fix may change the performance of nanosecond-scale benchmarks, +bloop is disabled by default in the `go fix` analyzer suite; see golang/go#74967. # Analyzer any diff --git a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/maps.go b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/maps.go index f97541d4b3..795f5b6c6b 100644 --- a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/maps.go +++ b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/maps.go @@ -231,9 +231,28 @@ func mapsloop(pass *analysis.Pass) (any, error) { // Have: for k, v := range x { lhs = rhs } assign := rng.Body.List[0].(*ast.AssignStmt) + + // usesKV reports whether e references vars k or v. + usesKV := func(e ast.Expr) bool { + k := info.Defs[rng.Key.(*ast.Ident)] + v := info.Defs[rng.Value.(*ast.Ident)] + for n := range ast.Preorder(e) { + if id, ok := n.(*ast.Ident); ok { + obj := info.Uses[id] + if obj != nil && // don't rely on k, v being non-nil + (obj == k || obj == v) { + return true + } + } + } + return false + } + if index, ok := assign.Lhs[0].(*ast.IndexExpr); ok && + len(assign.Lhs) == 1 && astutil.EqualSyntax(rng.Key, index.Index) && - astutil.EqualSyntax(rng.Value, assign.Rhs[0]) { + astutil.EqualSyntax(rng.Value, assign.Rhs[0]) && + !usesKV(index.X) { // reject (e.g.) f(k, v)[k] = v if tmap, ok := typeparams.CoreType(info.TypeOf(index.X)).(*types.Map); ok && types.Identical(info.TypeOf(index), info.TypeOf(rng.Value)) && // m[k], v types.Identical(tmap.Key(), info.TypeOf(rng.Key)) { diff --git a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/modernize.go b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/modernize.go index 013ce79d6c..f09a2d26ca 100644 --- a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/modernize.go +++ b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/modernize/modernize.go @@ -34,7 +34,7 @@ var doc string var Suite = []*analysis.Analyzer{ AnyAnalyzer, // AppendClippedAnalyzer, // not nil-preserving! - BLoopAnalyzer, + // BLoopAnalyzer, // may skew benchmark results, see golang/go#74967 FmtAppendfAnalyzer, ForVarAnalyzer, MapsLoopAnalyzer, diff --git a/src/cmd/vendor/modules.txt b/src/cmd/vendor/modules.txt index 7c122cd9d1..9c179c4bcd 100644 --- a/src/cmd/vendor/modules.txt +++ b/src/cmd/vendor/modules.txt @@ -73,7 +73,7 @@ golang.org/x/text/internal/tag golang.org/x/text/language golang.org/x/text/transform golang.org/x/text/unicode/norm -# golang.org/x/tools v0.39.1-0.20251205000126-062ef7b6ced2 +# golang.org/x/tools v0.39.1-0.20251230210517-d44be789a05c ## explicit; go 1.24.0 golang.org/x/tools/cmd/bisect golang.org/x/tools/cover diff --git a/src/crypto/cipher/gcm_fips140v2.0_test.go b/src/crypto/cipher/gcm_fips140v1.26_test.go index d3a8ea5c63..9f17a497ca 100644 --- a/src/crypto/cipher/gcm_fips140v2.0_test.go +++ b/src/crypto/cipher/gcm_fips140v1.26_test.go @@ -18,10 +18,10 @@ import ( "testing" ) -func TestGCMNoncesFIPSV2(t *testing.T) { +func TestGCMNoncesFIPSV126(t *testing.T) { cryptotest.MustSupportFIPS140(t) if !fips140.Enabled { - cmd := testenv.Command(t, testenv.Executable(t), "-test.run=^TestGCMNoncesFIPSV2$", "-test.v") + cmd := testenv.Command(t, testenv.Executable(t), "-test.run=^TestGCMNoncesFIPSV126$", "-test.v") cmd.Env = append(cmd.Environ(), "GODEBUG=fips140=on") out, err := cmd.CombinedOutput() t.Logf("running with GODEBUG=fips140=on:\n%s", out) diff --git a/src/crypto/hpke/aead_fipsv1.0.go b/src/crypto/hpke/aead_fips140v1.0.go index 986126cbf9..986126cbf9 100644 --- a/src/crypto/hpke/aead_fipsv1.0.go +++ b/src/crypto/hpke/aead_fips140v1.0.go diff --git a/src/crypto/hpke/aead_fipsv2.0.go b/src/crypto/hpke/aead_fips140v1.26.go index 710eb1c08f..710eb1c08f 100644 --- a/src/crypto/hpke/aead_fipsv2.0.go +++ b/src/crypto/hpke/aead_fips140v1.26.go diff --git a/src/crypto/internal/fips140only/fips140only_test.go b/src/crypto/internal/fips140only/fips140only_test.go new file mode 100644 index 0000000000..96df536d56 --- /dev/null +++ b/src/crypto/internal/fips140only/fips140only_test.go @@ -0,0 +1,408 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fips140only_test + +import ( + "crypto" + "crypto/aes" + "crypto/cipher" + "crypto/des" + "crypto/dsa" + "crypto/ecdh" + "crypto/ecdsa" + "crypto/ed25519" + "crypto/elliptic" + "crypto/hkdf" + "crypto/hmac" + "crypto/hpke" + "crypto/internal/cryptotest" + "crypto/internal/fips140" + "crypto/internal/fips140only" + "crypto/md5" + "crypto/mlkem" + "crypto/mlkem/mlkemtest" + "crypto/pbkdf2" + "crypto/rand" + "crypto/rc4" + "crypto/rsa" + "crypto/sha1" + "crypto/sha256" + _ "crypto/sha3" + _ "crypto/sha512" + "crypto/x509" + "encoding/pem" + "fmt" + "internal/godebug" + "internal/testenv" + "io" + "math/big" + "os" + "strings" + "testing" + + "golang.org/x/crypto/chacha20poly1305" +) + +func TestFIPS140Only(t *testing.T) { + cryptotest.MustSupportFIPS140(t) + if !fips140only.Enforced() { + cmd := testenv.Command(t, testenv.Executable(t), "-test.run=^TestFIPS140Only$", "-test.v") + cmd.Env = append(cmd.Environ(), "GODEBUG=fips140=only") + out, err := cmd.CombinedOutput() + t.Logf("running with GODEBUG=fips140=only:\n%s", out) + if err != nil { + t.Errorf("fips140=only subprocess failed: %v", err) + } + return + } + t.Run("cryptocustomrand=0", func(t *testing.T) { + t.Setenv("GODEBUG", os.Getenv("GODEBUG")+",cryptocustomrand=0") + testFIPS140Only(t) + }) + t.Run("cryptocustomrand=1", func(t *testing.T) { + t.Setenv("GODEBUG", os.Getenv("GODEBUG")+",cryptocustomrand=1") + testFIPS140Only(t) + }) +} + +func testFIPS140Only(t *testing.T) { + if !fips140only.Enforced() { + t.Fatal("FIPS 140-only mode not enforced") + } + t.Logf("GODEBUG=fips140=only enabled") + fips140.ResetServiceIndicator() + + aesBlock, err := aes.NewCipher(make([]byte, 16)) + if err != nil { + t.Fatal(err) + } + notAESBlock := blockWrap{aesBlock} + iv := make([]byte, aes.BlockSize) + + cipher.NewCBCEncrypter(aesBlock, iv) + expectPanic(t, func() { cipher.NewCBCEncrypter(notAESBlock, iv) }) + cipher.NewCBCDecrypter(aesBlock, iv) + expectPanic(t, func() { cipher.NewCBCDecrypter(notAESBlock, iv) }) + + expectPanic(t, func() { cipher.NewCFBEncrypter(aesBlock, iv) }) + expectPanic(t, func() { cipher.NewCFBDecrypter(aesBlock, iv) }) + + cipher.NewCTR(aesBlock, iv) + expectPanic(t, func() { cipher.NewCTR(notAESBlock, iv) }) + + expectPanic(t, func() { cipher.NewOFB(aesBlock, iv) }) + + expectErr(t, errRet2(cipher.NewGCM(aesBlock))) + expectErr(t, errRet2(cipher.NewGCMWithNonceSize(aesBlock, 12))) + expectErr(t, errRet2(cipher.NewGCMWithTagSize(aesBlock, 12))) + expectNoErr(t, errRet2(cipher.NewGCMWithRandomNonce(aesBlock))) + + expectErr(t, errRet2(des.NewCipher(make([]byte, 8)))) + expectErr(t, errRet2(des.NewTripleDESCipher(make([]byte, 24)))) + + expectErr(t, errRet2(rc4.NewCipher(make([]byte, 16)))) + + expectErr(t, errRet2(chacha20poly1305.New(make([]byte, chacha20poly1305.KeySize)))) + expectErr(t, errRet2(chacha20poly1305.NewX(make([]byte, chacha20poly1305.KeySize)))) + + expectPanic(t, func() { md5.New().Sum(nil) }) + expectErr(t, errRet2(md5.New().Write(make([]byte, 16)))) + expectPanic(t, func() { md5.Sum([]byte("foo")) }) + + expectPanic(t, func() { sha1.New().Sum(nil) }) + expectErr(t, errRet2(sha1.New().Write(make([]byte, 16)))) + expectPanic(t, func() { sha1.Sum([]byte("foo")) }) + + withApprovedHash(func(h crypto.Hash) { h.New().Sum(nil) }) + withNonApprovedHash(func(h crypto.Hash) { expectPanic(t, func() { h.New().Sum(nil) }) }) + + expectErr(t, errRet2(pbkdf2.Key(sha256.New, "password", make([]byte, 16), 1, 10))) + expectErr(t, errRet2(pbkdf2.Key(sha256.New, "password", make([]byte, 10), 1, 14))) + withNonApprovedHash(func(h crypto.Hash) { + expectErr(t, errRet2(pbkdf2.Key(h.New, "password", make([]byte, 16), 1, 14))) + }) + withApprovedHash(func(h crypto.Hash) { + expectNoErr(t, errRet2(pbkdf2.Key(h.New, "password", make([]byte, 16), 1, 14))) + }) + + expectPanic(t, func() { hmac.New(sha256.New, make([]byte, 10)) }) + withNonApprovedHash(func(h crypto.Hash) { + expectPanic(t, func() { hmac.New(h.New, make([]byte, 16)) }) + }) + withApprovedHash(func(h crypto.Hash) { hmac.New(h.New, make([]byte, 16)) }) + + expectErr(t, errRet2(hkdf.Key(sha256.New, make([]byte, 10), nil, "", 16))) + withNonApprovedHash(func(h crypto.Hash) { + expectErr(t, errRet2(hkdf.Key(h.New, make([]byte, 16), nil, "", 16))) + }) + withApprovedHash(func(h crypto.Hash) { + expectNoErr(t, errRet2(hkdf.Key(h.New, make([]byte, 16), nil, "", 16))) + }) + + expectErr(t, errRet2(hkdf.Extract(sha256.New, make([]byte, 10), nil))) + withNonApprovedHash(func(h crypto.Hash) { + expectErr(t, errRet2(hkdf.Extract(h.New, make([]byte, 16), nil))) + }) + withApprovedHash(func(h crypto.Hash) { + expectNoErr(t, errRet2(hkdf.Extract(h.New, make([]byte, 16), nil))) + }) + + expectErr(t, errRet2(hkdf.Expand(sha256.New, make([]byte, 10), "", 16))) + withNonApprovedHash(func(h crypto.Hash) { + expectErr(t, errRet2(hkdf.Expand(h.New, make([]byte, 16), "", 16))) + }) + withApprovedHash(func(h crypto.Hash) { + expectNoErr(t, errRet2(hkdf.Expand(h.New, make([]byte, 16), "", 16))) + }) + + expectErr(t, errRet2(rand.Prime(rand.Reader, 10))) + + expectErr(t, dsa.GenerateParameters(&dsa.Parameters{}, rand.Reader, dsa.L1024N160)) + expectErr(t, dsa.GenerateKey(&dsa.PrivateKey{}, rand.Reader)) + expectErr(t, errRet3(dsa.Sign(rand.Reader, &dsa.PrivateKey{}, make([]byte, 16)))) + expectPanic(t, func() { + dsa.Verify(&dsa.PublicKey{}, make([]byte, 16), big.NewInt(1), big.NewInt(1)) + }) + + expectErr(t, errRet2(ecdh.X25519().GenerateKey(rand.Reader))) + expectErr(t, errRet2(ecdh.X25519().NewPrivateKey(make([]byte, 32)))) + expectErr(t, errRet2(ecdh.X25519().NewPublicKey(make([]byte, 32)))) + for _, curve := range []ecdh.Curve{ecdh.P256(), ecdh.P384(), ecdh.P521()} { + expectErrIfCustomRand(t, errRet2(curve.GenerateKey(readerWrap{rand.Reader}))) + k, err := curve.GenerateKey(rand.Reader) + if err != nil { + t.Fatal(err) + } + expectNoErr(t, errRet2(curve.NewPrivateKey(k.Bytes()))) + expectNoErr(t, errRet2(curve.NewPublicKey(k.PublicKey().Bytes()))) + } + + for _, curve := range []elliptic.Curve{elliptic.P256(), elliptic.P384(), elliptic.P521()} { + expectErrIfCustomRand(t, errRet2(ecdsa.GenerateKey(curve, readerWrap{rand.Reader}))) + k, err := ecdsa.GenerateKey(curve, rand.Reader) + if err != nil { + t.Fatal(err) + } + + expectErrIfCustomRand(t, errRet2(k.Sign(readerWrap{rand.Reader}, make([]byte, 32), nil))) + expectErrIfCustomRand(t, errRet2(ecdsa.SignASN1(readerWrap{rand.Reader}, k, make([]byte, 32)))) + expectErrIfCustomRand(t, errRet3(ecdsa.Sign(readerWrap{rand.Reader}, k, make([]byte, 32)))) + expectNoErr(t, errRet2(k.Sign(rand.Reader, make([]byte, 32), nil))) + expectNoErr(t, errRet2(ecdsa.SignASN1(rand.Reader, k, make([]byte, 32)))) + expectNoErr(t, errRet3(ecdsa.Sign(rand.Reader, k, make([]byte, 32)))) + + withNonApprovedHash(func(h crypto.Hash) { + expectErr(t, errRet2(k.Sign(nil, make([]byte, h.Size()), h))) + }) + withApprovedHash(func(h crypto.Hash) { + expectNoErr(t, errRet2(k.Sign(nil, make([]byte, h.Size()), h))) + }) + } + customCurve := &elliptic.CurveParams{Name: "custom", P: big.NewInt(1)} + expectErr(t, errRet2(ecdsa.GenerateKey(customCurve, rand.Reader))) + + _, ed25519Key, err := ed25519.GenerateKey(rand.Reader) + if err != nil { + t.Fatal(err) + } + expectNoErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 32), crypto.Hash(0)))) + expectNoErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 64), crypto.SHA512))) + // ed25519ctx is not allowed (but ed25519ph with context is). + expectErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 32), &ed25519.Options{ + Context: "test", + }))) + expectNoErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 64), &ed25519.Options{ + Hash: crypto.SHA512, Context: "test", + }))) + expectNoErr(t, errRet2(ed25519Key.Sign(nil, make([]byte, 64), &ed25519.Options{ + Hash: crypto.SHA512, + }))) + + expectErr(t, errRet2(rsa.GenerateMultiPrimeKey(rand.Reader, 3, 2048))) + expectErr(t, errRet2(rsa.GenerateKey(rand.Reader, 1024))) + expectErr(t, errRet2(rsa.GenerateKey(rand.Reader, 2049))) + expectErrIfCustomRand(t, errRet2(rsa.GenerateKey(readerWrap{rand.Reader}, 2048))) + rsaKey, err := rsa.GenerateKey(rand.Reader, 2048) + expectNoErr(t, err) + + smallKey := parseKey(testingKey(`-----BEGIN RSA TESTING KEY----- +MIICXQIBAAKBgQDMrln6XoAa3Rjts+kRi5obbP86qSf/562RcuDO+yMXeTLHfi4M +8ubyhoFY+UKBCGBLmmTO7ikbvQgdipkT3xVkU8nM3XTW4sxrnw0X5QXsl4PGlMo0 +5UufxYyQxe7bbjuwFz2XnN6Jz4orpOfO0s36/KVHj9lZRl+REpr/Jy+nJQIDAQAB +AoGAJ9WEwGO01cWSzOwXH2mGX/EKCQ4TsUuS7XwogU/B6BcXyVhmuPFq/ecsdDbq +ePc62mvdU6JpELNsyWcIXKQtYsRgJHxNS+KJkCQIq6YeiAWRG0XL6q+qVj+HtT8a +1Qrmul9ZBd23Y9wLF8pg/xWDQYvb8DPAb/xJ0e/KEBZcWU8CQQDXFCFCGpCfwyxY +Cq8G/3B94D9UYwk5mK6jRIH5m8LbaX9bKKetf8+If8TWVgeuiRjjN4WEQ78lPoSg +3Fsz2qs3AkEA85/JCudNUf2FnY+T6h1c/2SWekZiZ1NS4lCh/C7iYuAN3oa8zGkf +gjjR5e0+Z8rUAcZkTukxyLLaNqy6rs9GgwJAVR6pXvEGhcQHe7yWso1LpvWl+q7L +StkrXIBTdEb54j4pYhl/6wFnUB1I+I7JsYCeseYaWFM7hfDtKoCrM6V6FwJBANxh +KmfmnJcSkw/YlaEuNrYAs+6gRNvbEBsRfba2Yqu2qlUl5Ruz7IDMDXPEjLMvU2DX +ql2HrTU0NRlIXwdLESkCQQDGJ54H6WK1eE1YvtxCaLm28zmogcFlvc21pym+PpM1 +bXVL8iKLrG91IYQByUHZIn3WVAd2bfi4MfKagRt0ggd4 +-----END RSA TESTING KEY-----`)) + + expectNoErr(t, errRet2(rsaKey.Sign(rand.Reader, make([]byte, 32), crypto.SHA256))) + expectErr(t, errRet2(smallKey.Sign(rand.Reader, make([]byte, 32), crypto.SHA256))) + expectErr(t, errRet2(rsaKey.Sign(rand.Reader, make([]byte, 20), crypto.SHA1))) + // rand is always ignored for PKCS1v15 signing + expectNoErr(t, errRet2(rsaKey.Sign(readerWrap{rand.Reader}, make([]byte, 32), crypto.SHA256))) + + sigPKCS1v15, err := rsa.SignPKCS1v15(rand.Reader, rsaKey, crypto.SHA256, make([]byte, 32)) + expectNoErr(t, err) + expectErr(t, errRet2(rsa.SignPKCS1v15(rand.Reader, smallKey, crypto.SHA256, make([]byte, 32)))) + expectErr(t, errRet2(rsa.SignPKCS1v15(rand.Reader, rsaKey, crypto.SHA1, make([]byte, 20)))) + // rand is always ignored for PKCS1v15 signing + expectNoErr(t, errRet2(rsa.SignPKCS1v15(readerWrap{rand.Reader}, rsaKey, crypto.SHA256, make([]byte, 32)))) + + expectNoErr(t, rsa.VerifyPKCS1v15(&rsaKey.PublicKey, crypto.SHA256, make([]byte, 32), sigPKCS1v15)) + expectErr(t, rsa.VerifyPKCS1v15(&smallKey.PublicKey, crypto.SHA256, make([]byte, 32), sigPKCS1v15)) + expectErr(t, rsa.VerifyPKCS1v15(&rsaKey.PublicKey, crypto.SHA1, make([]byte, 20), sigPKCS1v15)) + + sigPSS, err := rsa.SignPSS(rand.Reader, rsaKey, crypto.SHA256, make([]byte, 32), nil) + expectNoErr(t, err) + expectErr(t, errRet2(rsa.SignPSS(rand.Reader, smallKey, crypto.SHA256, make([]byte, 32), nil))) + expectErr(t, errRet2(rsa.SignPSS(rand.Reader, rsaKey, crypto.SHA1, make([]byte, 20), nil))) + expectErr(t, errRet2(rsa.SignPSS(readerWrap{rand.Reader}, rsaKey, crypto.SHA256, make([]byte, 32), nil))) + + expectNoErr(t, rsa.VerifyPSS(&rsaKey.PublicKey, crypto.SHA256, make([]byte, 32), sigPSS, nil)) + expectErr(t, rsa.VerifyPSS(&smallKey.PublicKey, crypto.SHA256, make([]byte, 32), sigPSS, nil)) + expectErr(t, rsa.VerifyPSS(&rsaKey.PublicKey, crypto.SHA1, make([]byte, 20), sigPSS, nil)) + + k, err := mlkem.GenerateKey768() + expectNoErr(t, err) + expectErr(t, errRet3(mlkemtest.Encapsulate768(k.EncapsulationKey(), make([]byte, 32)))) + k1024, err := mlkem.GenerateKey1024() + expectNoErr(t, err) + expectErr(t, errRet3(mlkemtest.Encapsulate1024(k1024.EncapsulationKey(), make([]byte, 32)))) + + for _, kem := range []hpke.KEM{ + hpke.DHKEM(ecdh.P256()), + hpke.DHKEM(ecdh.P384()), + hpke.DHKEM(ecdh.P521()), + hpke.MLKEM768(), + hpke.MLKEM1024(), + hpke.MLKEM768P256(), + hpke.MLKEM1024P384(), + hpke.MLKEM768X25519(), // allowed as hybrid + } { + t.Run(fmt.Sprintf("HKPE KEM %04x", kem.ID()), func(t *testing.T) { + k, err := kem.GenerateKey() + expectNoErr(t, err) + expectNoErr(t, errRet2(kem.DeriveKeyPair(make([]byte, 64)))) + kb, err := k.Bytes() + expectNoErr(t, err) + expectNoErr(t, errRet2(kem.NewPrivateKey(kb))) + expectNoErr(t, errRet2(kem.NewPublicKey(k.PublicKey().Bytes()))) + if fips140.Version() == "v1.0.0" { + t.Skip("FIPS 140-3 Module v1.0.0 does not provide HPKE GCM modes") + } + c, err := hpke.Seal(k.PublicKey(), hpke.HKDFSHA256(), hpke.AES128GCM(), nil, nil) + expectNoErr(t, err) + _, err = hpke.Open(k, hpke.HKDFSHA256(), hpke.AES128GCM(), nil, c) + expectNoErr(t, err) + }) + } + expectErr(t, errRet2(hpke.DHKEM(ecdh.X25519()).GenerateKey())) + expectErr(t, errRet2(hpke.DHKEM(ecdh.X25519()).DeriveKeyPair(make([]byte, 64)))) + expectErr(t, errRet2(hpke.DHKEM(ecdh.X25519()).NewPrivateKey(make([]byte, 32)))) + expectErr(t, errRet2(hpke.DHKEM(ecdh.X25519()).NewPublicKey(make([]byte, 32)))) + hpkeK, err := hpke.MLKEM768().GenerateKey() + expectNoErr(t, err) + expectErr(t, errRet2(hpke.Seal(hpkeK.PublicKey(), hpke.HKDFSHA256(), hpke.ChaCha20Poly1305(), nil, nil))) + expectErr(t, errRet2(hpke.Open(hpkeK, hpke.HKDFSHA256(), hpke.ChaCha20Poly1305(), nil, make([]byte, 2000)))) + + // fips140=only mode should prevent any operation that would make the FIPS + // 140-3 module set its service indicator to false. + if !fips140.ServiceIndicator() { + t.Errorf("service indicator not set") + } +} + +type blockWrap struct { + cipher.Block +} + +type readerWrap struct { + io.Reader +} + +func withApprovedHash(f func(crypto.Hash)) { + f(crypto.SHA224) + f(crypto.SHA256) + f(crypto.SHA384) + f(crypto.SHA512) + f(crypto.SHA3_224) + f(crypto.SHA3_256) + f(crypto.SHA3_384) + f(crypto.SHA3_512) + f(crypto.SHA512_224) + f(crypto.SHA512_256) +} + +func withNonApprovedHash(f func(crypto.Hash)) { + f(crypto.MD5) + f(crypto.SHA1) +} + +func expectPanic(t *testing.T, f func()) { + t.Helper() + defer func() { + t.Helper() + if err := recover(); err == nil { + t.Errorf("expected panic") + } else { + if s, ok := err.(string); !ok || !strings.Contains(s, "FIPS 140-only") { + t.Errorf("unexpected panic: %v", err) + } + } + }() + f() +} + +var cryptocustomrand = godebug.New("cryptocustomrand") + +func expectErr(t *testing.T, err error) { + t.Helper() + if err == nil { + t.Errorf("expected error") + } else if !strings.Contains(err.Error(), "FIPS 140-only") { + t.Errorf("unexpected error: %v", err) + } +} + +func expectNoErr(t *testing.T, err error) { + t.Helper() + if err != nil { + t.Errorf("unexpected error: %v", err) + } +} + +func expectErrIfCustomRand(t *testing.T, err error) { + t.Helper() + if cryptocustomrand.Value() == "1" { + expectErr(t, err) + } else { + expectNoErr(t, err) + } +} + +func errRet2[T any](_ T, err error) error { + return err +} + +func errRet3[T any](_, _ T, err error) error { + return err +} + +func testingKey(s string) string { return strings.ReplaceAll(s, "TESTING KEY", "PRIVATE KEY") } + +func parseKey(s string) *rsa.PrivateKey { + p, _ := pem.Decode([]byte(s)) + k, err := x509.ParsePKCS1PrivateKey(p.Bytes) + if err != nil { + panic(err) + } + return k +} diff --git a/src/crypto/internal/fips140test/acvp_capabilities_fips140v2.0.json b/src/crypto/internal/fips140test/acvp_capabilities_fips140v1.26.json index 33c8aa235b..33c8aa235b 100644 --- a/src/crypto/internal/fips140test/acvp_capabilities_fips140v2.0.json +++ b/src/crypto/internal/fips140test/acvp_capabilities_fips140v1.26.json diff --git a/src/crypto/internal/fips140test/acvp_fips140v2.0_test.go b/src/crypto/internal/fips140test/acvp_fips140v1.26_test.go index e9ef91537a..10a44f1492 100644 --- a/src/crypto/internal/fips140test/acvp_fips140v2.0_test.go +++ b/src/crypto/internal/fips140test/acvp_fips140v1.26_test.go @@ -12,10 +12,10 @@ import ( "fmt" ) -//go:embed acvp_capabilities_fips140v2.0.json +//go:embed acvp_capabilities_fips140v1.26.json var capabilitiesJson []byte -var testConfigFile = "acvp_test_fips140v2.0.config.json" +var testConfigFile = "acvp_test_fips140v1.26.config.json" func init() { commands["ML-DSA-44/keyGen"] = cmdMlDsaKeyGenAft(mldsa.NewPrivateKey44) diff --git a/src/crypto/internal/fips140test/acvp_test_fips140v2.0.config.json b/src/crypto/internal/fips140test/acvp_test_fips140v1.26.config.json index 51c76d9288..51c76d9288 100644 --- a/src/crypto/internal/fips140test/acvp_test_fips140v2.0.config.json +++ b/src/crypto/internal/fips140test/acvp_test_fips140v1.26.config.json diff --git a/src/crypto/internal/fips140test/cast_fips140v1.0_test.go b/src/crypto/internal/fips140test/cast_fips140v1.0_test.go index 4780966208..b9ddfe4d8b 100644 --- a/src/crypto/internal/fips140test/cast_fips140v1.0_test.go +++ b/src/crypto/internal/fips140test/cast_fips140v1.0_test.go @@ -6,4 +6,4 @@ package fipstest -func fips140v2Conditionals() {} +func fips140v126Conditionals() {} diff --git a/src/crypto/internal/fips140test/cast_fips140v2.0_test.go b/src/crypto/internal/fips140test/cast_fips140v1.26_test.go index 06e0513a7f..ef79068c38 100644 --- a/src/crypto/internal/fips140test/cast_fips140v2.0_test.go +++ b/src/crypto/internal/fips140test/cast_fips140v1.26_test.go @@ -8,7 +8,7 @@ package fipstest import "crypto/internal/fips140/mldsa" -func fips140v2Conditionals() { +func fips140v126Conditionals() { // ML-DSA sign and verify PCT kMLDSA := mldsa.GenerateKey44() // ML-DSA-44 diff --git a/src/crypto/internal/fips140test/cast_test.go b/src/crypto/internal/fips140test/cast_test.go index 5a80006622..817dcb9a35 100644 --- a/src/crypto/internal/fips140test/cast_test.go +++ b/src/crypto/internal/fips140test/cast_test.go @@ -115,7 +115,7 @@ func TestAllCASTs(t *testing.T) { // TestConditionals causes the conditional CASTs and PCTs to be invoked. func TestConditionals(t *testing.T) { - fips140v2Conditionals() + fips140v126Conditionals() // ML-KEM PCT kMLKEM, err := mlkem.GenerateKey768() if err != nil { diff --git a/src/crypto/internal/rand/rand_fipsv1.0.go b/src/crypto/internal/rand/rand_fips140v1.0.go index 29eba7e0bc..29eba7e0bc 100644 --- a/src/crypto/internal/rand/rand_fipsv1.0.go +++ b/src/crypto/internal/rand/rand_fips140v1.0.go diff --git a/src/crypto/internal/rand/rand_fipsv2.0.go b/src/crypto/internal/rand/rand_fips140v1.26.go index 0dc18e7883..0dc18e7883 100644 --- a/src/crypto/internal/rand/rand_fipsv2.0.go +++ b/src/crypto/internal/rand/rand_fips140v1.26.go diff --git a/src/crypto/tls/conn.go b/src/crypto/tls/conn.go index c04c7a506e..a840125a45 100644 --- a/src/crypto/tls/conn.go +++ b/src/crypto/tls/conn.go @@ -224,6 +224,9 @@ func (hc *halfConn) changeCipherSpec() error { return nil } +// setTrafficSecret sets the traffic secret for the given encryption level. setTrafficSecret +// should not be called directly, but rather through the Conn setWriteTrafficSecret and +// setReadTrafficSecret wrapper methods. func (hc *halfConn) setTrafficSecret(suite *cipherSuiteTLS13, level QUICEncryptionLevel, secret []byte) { hc.trafficSecret = secret hc.level = level @@ -1339,9 +1342,6 @@ func (c *Conn) handleKeyUpdate(keyUpdate *keyUpdateMsg) error { return c.in.setErrorLocked(c.sendAlert(alertInternalError)) } - newSecret := cipherSuite.nextTrafficSecret(c.in.trafficSecret) - c.in.setTrafficSecret(cipherSuite, QUICEncryptionLevelInitial, newSecret) - if keyUpdate.updateRequested { c.out.Lock() defer c.out.Unlock() @@ -1359,7 +1359,12 @@ func (c *Conn) handleKeyUpdate(keyUpdate *keyUpdateMsg) error { } newSecret := cipherSuite.nextTrafficSecret(c.out.trafficSecret) - c.out.setTrafficSecret(cipherSuite, QUICEncryptionLevelInitial, newSecret) + c.setWriteTrafficSecret(cipherSuite, QUICEncryptionLevelInitial, newSecret) + } + + newSecret := cipherSuite.nextTrafficSecret(c.in.trafficSecret) + if err := c.setReadTrafficSecret(cipherSuite, QUICEncryptionLevelInitial, newSecret); err != nil { + return err } return nil @@ -1576,7 +1581,9 @@ func (c *Conn) handshakeContext(ctx context.Context) (ret error) { // Provide the 1-RTT read secret now that the handshake is complete. // The QUIC layer MUST NOT decrypt 1-RTT packets prior to completing // the handshake (RFC 9001, Section 5.7). - c.quicSetReadSecret(QUICEncryptionLevelApplication, c.cipherSuite, c.in.trafficSecret) + if err := c.quicSetReadSecret(QUICEncryptionLevelApplication, c.cipherSuite, c.in.trafficSecret); err != nil { + return err + } } else { c.out.Lock() a, ok := errors.AsType[alert](c.out.err) @@ -1672,3 +1679,25 @@ func (c *Conn) VerifyHostname(host string) error { } return c.peerCertificates[0].VerifyHostname(host) } + +// setReadTrafficSecret sets the read traffic secret for the given encryption level. If +// being called at the same time as setWriteTrafficSecret, the caller must ensure the call +// to setWriteTrafficSecret happens first so any alerts are sent at the write level. +func (c *Conn) setReadTrafficSecret(suite *cipherSuiteTLS13, level QUICEncryptionLevel, secret []byte) error { + // Ensure that there are no buffered handshake messages before changing the + // read keys, since that can cause messages to be parsed that were encrypted + // using old keys which are no longer appropriate. + if c.hand.Len() != 0 { + c.sendAlert(alertUnexpectedMessage) + return errors.New("tls: handshake buffer not empty before setting read traffic secret") + } + c.in.setTrafficSecret(suite, level, secret) + return nil +} + +// setWriteTrafficSecret sets the write traffic secret for the given encryption level. If +// being called at the same time as setReadTrafficSecret, the caller must ensure the call +// to setWriteTrafficSecret happens first so any alerts are sent at the write level. +func (c *Conn) setWriteTrafficSecret(suite *cipherSuiteTLS13, level QUICEncryptionLevel, secret []byte) { + c.out.setTrafficSecret(suite, level, secret) +} diff --git a/src/crypto/tls/handshake_client_tls13.go b/src/crypto/tls/handshake_client_tls13.go index e696bd3a13..77a24b4a78 100644 --- a/src/crypto/tls/handshake_client_tls13.go +++ b/src/crypto/tls/handshake_client_tls13.go @@ -490,16 +490,17 @@ func (hs *clientHandshakeStateTLS13) establishHandshakeKeys() error { handshakeSecret := earlySecret.HandshakeSecret(sharedKey) clientSecret := handshakeSecret.ClientHandshakeTrafficSecret(hs.transcript) - c.out.setTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, clientSecret) + c.setWriteTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, clientSecret) serverSecret := handshakeSecret.ServerHandshakeTrafficSecret(hs.transcript) - c.in.setTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, serverSecret) + if err := c.setReadTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, serverSecret); err != nil { + return err + } if c.quic != nil { - if c.hand.Len() != 0 { - c.sendAlert(alertUnexpectedMessage) - } c.quicSetWriteSecret(QUICEncryptionLevelHandshake, hs.suite.id, clientSecret) - c.quicSetReadSecret(QUICEncryptionLevelHandshake, hs.suite.id, serverSecret) + if err := c.quicSetReadSecret(QUICEncryptionLevelHandshake, hs.suite.id, serverSecret); err != nil { + return err + } } err = c.config.writeKeyLog(keyLogLabelClientHandshake, hs.hello.random, clientSecret) @@ -710,7 +711,9 @@ func (hs *clientHandshakeStateTLS13) readServerFinished() error { hs.trafficSecret = hs.masterSecret.ClientApplicationTrafficSecret(hs.transcript) serverSecret := hs.masterSecret.ServerApplicationTrafficSecret(hs.transcript) - c.in.setTrafficSecret(hs.suite, QUICEncryptionLevelApplication, serverSecret) + if err := c.setReadTrafficSecret(hs.suite, QUICEncryptionLevelApplication, serverSecret); err != nil { + return err + } err = c.config.writeKeyLog(keyLogLabelClientTraffic, hs.hello.random, hs.trafficSecret) if err != nil { @@ -813,16 +816,13 @@ func (hs *clientHandshakeStateTLS13) sendClientFinished() error { return err } - c.out.setTrafficSecret(hs.suite, QUICEncryptionLevelApplication, hs.trafficSecret) + c.setWriteTrafficSecret(hs.suite, QUICEncryptionLevelApplication, hs.trafficSecret) if !c.config.SessionTicketsDisabled && c.config.ClientSessionCache != nil { c.resumptionSecret = hs.masterSecret.ResumptionMasterSecret(hs.transcript) } if c.quic != nil { - if c.hand.Len() != 0 { - c.sendAlert(alertUnexpectedMessage) - } c.quicSetWriteSecret(QUICEncryptionLevelApplication, hs.suite.id, hs.trafficSecret) } diff --git a/src/crypto/tls/handshake_server_tls13.go b/src/crypto/tls/handshake_server_tls13.go index 3bed1359a3..b066924e29 100644 --- a/src/crypto/tls/handshake_server_tls13.go +++ b/src/crypto/tls/handshake_server_tls13.go @@ -410,7 +410,9 @@ func (hs *serverHandshakeStateTLS13) checkForResumption() error { return err } earlyTrafficSecret := hs.earlySecret.ClientEarlyTrafficSecret(transcript) - c.quicSetReadSecret(QUICEncryptionLevelEarly, hs.suite.id, earlyTrafficSecret) + if err := c.quicSetReadSecret(QUICEncryptionLevelEarly, hs.suite.id, earlyTrafficSecret); err != nil { + return err + } } c.didResume = true @@ -514,6 +516,14 @@ func (hs *serverHandshakeStateTLS13) sendDummyChangeCipherSpec() error { func (hs *serverHandshakeStateTLS13) doHelloRetryRequest(selectedGroup CurveID) (*keyShare, error) { c := hs.c + // Make sure the client didn't send extra handshake messages alongside + // their initial client_hello. If they sent two client_hello messages, + // we will consume the second before they respond to the server_hello. + if c.hand.Len() != 0 { + c.sendAlert(alertUnexpectedMessage) + return nil, errors.New("tls: handshake buffer not empty before HelloRetryRequest") + } + // The first ClientHello gets double-hashed into the transcript upon a // HelloRetryRequest. See RFC 8446, Section 4.4.1. if err := transcriptMsg(hs.clientHello, hs.transcript); err != nil { @@ -733,17 +743,18 @@ func (hs *serverHandshakeStateTLS13) sendServerParameters() error { } hs.handshakeSecret = earlySecret.HandshakeSecret(hs.sharedKey) - clientSecret := hs.handshakeSecret.ClientHandshakeTrafficSecret(hs.transcript) - c.in.setTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, clientSecret) serverSecret := hs.handshakeSecret.ServerHandshakeTrafficSecret(hs.transcript) - c.out.setTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, serverSecret) + c.setWriteTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, serverSecret) + clientSecret := hs.handshakeSecret.ClientHandshakeTrafficSecret(hs.transcript) + if err := c.setReadTrafficSecret(hs.suite, QUICEncryptionLevelHandshake, clientSecret); err != nil { + return err + } if c.quic != nil { - if c.hand.Len() != 0 { - c.sendAlert(alertUnexpectedMessage) - } c.quicSetWriteSecret(QUICEncryptionLevelHandshake, hs.suite.id, serverSecret) - c.quicSetReadSecret(QUICEncryptionLevelHandshake, hs.suite.id, clientSecret) + if err := c.quicSetReadSecret(QUICEncryptionLevelHandshake, hs.suite.id, clientSecret); err != nil { + return err + } } err := c.config.writeKeyLog(keyLogLabelClientHandshake, hs.clientHello.random, clientSecret) @@ -887,13 +898,9 @@ func (hs *serverHandshakeStateTLS13) sendServerFinished() error { hs.trafficSecret = hs.masterSecret.ClientApplicationTrafficSecret(hs.transcript) serverSecret := hs.masterSecret.ServerApplicationTrafficSecret(hs.transcript) - c.out.setTrafficSecret(hs.suite, QUICEncryptionLevelApplication, serverSecret) + c.setWriteTrafficSecret(hs.suite, QUICEncryptionLevelApplication, serverSecret) if c.quic != nil { - if c.hand.Len() != 0 { - // TODO: Handle this in setTrafficSecret? - c.sendAlert(alertUnexpectedMessage) - } c.quicSetWriteSecret(QUICEncryptionLevelApplication, hs.suite.id, serverSecret) } @@ -1123,7 +1130,9 @@ func (hs *serverHandshakeStateTLS13) readClientFinished() error { return errors.New("tls: invalid client finished hash") } - c.in.setTrafficSecret(hs.suite, QUICEncryptionLevelApplication, hs.trafficSecret) + if err := c.setReadTrafficSecret(hs.suite, QUICEncryptionLevelApplication, hs.trafficSecret); err != nil { + return err + } return nil } diff --git a/src/crypto/tls/handshake_test.go b/src/crypto/tls/handshake_test.go index 6e15459a9a..9cea8182d0 100644 --- a/src/crypto/tls/handshake_test.go +++ b/src/crypto/tls/handshake_test.go @@ -7,6 +7,7 @@ package tls import ( "bufio" "bytes" + "context" "crypto/ed25519" "crypto/x509" "encoding/hex" @@ -638,3 +639,142 @@ var clientEd25519KeyPEM = testingKey(` -----BEGIN TESTING KEY----- MC4CAQAwBQYDK2VwBCIEINifzf07d9qx3d44e0FSbV4mC/xQxT644RRbpgNpin7I -----END TESTING KEY-----`) + +func TestServerHelloTrailingMessage(t *testing.T) { + // In TLS 1.3 the change cipher spec message is optional. If a CCS message + // is not sent, after reading the ServerHello, the read traffic secret is + // set, and all following messages must be encrypted. If the server sends + // additional unencrypted messages in a record with the ServerHello, the + // client must either fail or ignore the additional messages. + + c, s := localPipe(t) + go func() { + ctx := context.Background() + srv := Server(s, testConfig) + clientHello, _, err := srv.readClientHello(ctx) + if err != nil { + testFatal(t, err) + } + + hs := serverHandshakeStateTLS13{ + c: srv, + ctx: ctx, + clientHello: clientHello, + } + if err := hs.processClientHello(); err != nil { + testFatal(t, err) + } + if err := transcriptMsg(hs.clientHello, hs.transcript); err != nil { + testFatal(t, err) + } + + record, err := concatHandshakeMessages(hs.hello, &encryptedExtensionsMsg{alpnProtocol: "h2"}) + if err != nil { + testFatal(t, err) + } + + if _, err := s.Write(record); err != nil { + testFatal(t, err) + } + srv.Close() + }() + + cli := Client(c, testConfig) + expectedErr := "tls: handshake buffer not empty before setting read traffic secret" + if err := cli.Handshake(); err == nil { + t.Fatal("expected error from incomplete handshake, got nil") + } else if err.Error() != expectedErr { + t.Fatalf("expected error %q, got %q", expectedErr, err.Error()) + } +} + +func TestClientHelloTrailingMessage(t *testing.T) { + // Same as TestServerHelloTrailingMessage but for the client side. + + c, s := localPipe(t) + go func() { + cli := Client(c, testConfig) + + hello, _, _, err := cli.makeClientHello() + if err != nil { + testFatal(t, err) + } + + record, err := concatHandshakeMessages(hello, &certificateMsgTLS13{}) + if err != nil { + testFatal(t, err) + } + + if _, err := c.Write(record); err != nil { + testFatal(t, err) + } + cli.Close() + }() + + srv := Server(s, testConfig) + expectedErr := "tls: handshake buffer not empty before setting read traffic secret" + if err := srv.Handshake(); err == nil { + t.Fatal("expected error from incomplete handshake, got nil") + } else if err.Error() != expectedErr { + t.Fatalf("expected error %q, got %q", expectedErr, err.Error()) + } +} + +func TestDoubleClientHelloHRR(t *testing.T) { + // If a client sends two ClientHello messages in a single record, and the + // server sends a HRR after reading the first ClientHello, the server must + // either fail or ignore the trailing ClientHello. + + c, s := localPipe(t) + + go func() { + cli := Client(c, testConfig) + + hello, _, _, err := cli.makeClientHello() + if err != nil { + testFatal(t, err) + } + hello.keyShares = nil + + record, err := concatHandshakeMessages(hello, hello) + if err != nil { + testFatal(t, err) + } + + if _, err := c.Write(record); err != nil { + testFatal(t, err) + } + cli.Close() + }() + + srv := Server(s, testConfig) + expectedErr := "tls: handshake buffer not empty before HelloRetryRequest" + if err := srv.Handshake(); err == nil { + t.Fatal("expected error from incomplete handshake, got nil") + } else if err.Error() != expectedErr { + t.Fatalf("expected error %q, got %q", expectedErr, err.Error()) + } +} + +// concatHandshakeMessages marshals and concatenates the given handshake +// messages into a single record. +func concatHandshakeMessages(msgs ...handshakeMessage) ([]byte, error) { + var marshalled []byte + for _, msg := range msgs { + data, err := msg.marshal() + if err != nil { + return nil, err + } + marshalled = append(marshalled, data...) + } + m := len(marshalled) + outBuf := make([]byte, recordHeaderLen) + outBuf[0] = byte(recordTypeHandshake) + vers := VersionTLS12 + outBuf[1] = byte(vers >> 8) + outBuf[2] = byte(vers) + outBuf[3] = byte(m >> 8) + outBuf[4] = byte(m) + outBuf = append(outBuf, marshalled...) + return outBuf, nil +} diff --git a/src/crypto/tls/quic.go b/src/crypto/tls/quic.go index b3f95dbb18..76b7eb2cbd 100644 --- a/src/crypto/tls/quic.go +++ b/src/crypto/tls/quic.go @@ -402,13 +402,22 @@ func (c *Conn) quicReadHandshakeBytes(n int) error { return nil } -func (c *Conn) quicSetReadSecret(level QUICEncryptionLevel, suite uint16, secret []byte) { +func (c *Conn) quicSetReadSecret(level QUICEncryptionLevel, suite uint16, secret []byte) error { + // Ensure that there are no buffered handshake messages before changing the + // read keys, since that can cause messages to be parsed that were encrypted + // using old keys which are no longer appropriate. + // TODO(roland): we should merge this check with the similar one in setReadTrafficSecret. + if c.hand.Len() != 0 { + c.sendAlert(alertUnexpectedMessage) + return errors.New("tls: handshake buffer not empty before setting read traffic secret") + } c.quic.events = append(c.quic.events, QUICEvent{ Kind: QUICSetReadSecret, Level: level, Suite: suite, Data: secret, }) + return nil } func (c *Conn) quicSetWriteSecret(level QUICEncryptionLevel, suite uint16, secret []byte) { diff --git a/src/debug/pe/file.go b/src/debug/pe/file.go index ed63a11cb6..91b7d1dca1 100644 --- a/src/debug/pe/file.go +++ b/src/debug/pe/file.go @@ -379,7 +379,11 @@ func (f *File) ImportedSymbols() ([]string, error) { } // seek to the virtual address specified in the import data directory - d = d[idd.VirtualAddress-ds.VirtualAddress:] + seek := idd.VirtualAddress - ds.VirtualAddress + if seek >= uint32(len(d)) { + return nil, errors.New("optional header data directory virtual size doesn't fit within data seek") + } + d = d[seek:] // start decoding the import directory var ida []ImportDirectory @@ -408,9 +412,16 @@ func (f *File) ImportedSymbols() ([]string, error) { dt.dll, _ = getString(names, int(dt.Name-ds.VirtualAddress)) d, _ = ds.Data() // seek to OriginalFirstThunk - d = d[dt.OriginalFirstThunk-ds.VirtualAddress:] + seek := dt.OriginalFirstThunk - ds.VirtualAddress + if seek >= uint32(len(d)) { + return nil, errors.New("import directory original first thunk doesn't fit within data seek") + } + d = d[seek:] for len(d) > 0 { if pe64 { // 64bit + if len(d) < 8 { + return nil, errors.New("thunk parsing needs at least 8-bytes") + } va := binary.LittleEndian.Uint64(d[0:8]) d = d[8:] if va == 0 { @@ -423,6 +434,9 @@ func (f *File) ImportedSymbols() ([]string, error) { all = append(all, fn+":"+dt.dll) } } else { // 32bit + if len(d) <= 4 { + return nil, errors.New("thunk parsing needs at least 5-bytes") + } va := binary.LittleEndian.Uint32(d[0:4]) d = d[4:] if va == 0 { diff --git a/src/encoding/gob/doc.go b/src/encoding/gob/doc.go index c746806887..390f25088e 100644 --- a/src/encoding/gob/doc.go +++ b/src/encoding/gob/doc.go @@ -153,16 +153,16 @@ are transmitted, even if all the elements are zero. Structs are sent as a sequence of (field number, field value) pairs. The field value is sent using the standard gob encoding for its type, recursively. If a -field has the zero value for its type (except for arrays; see above), it is omitted -from the transmission. The field number is defined by the type of the encoded -struct: the first field of the encoded type is field 0, the second is field 1, -etc. When encoding a value, the field numbers are delta encoded for efficiency -and the fields are always sent in order of increasing field number; the deltas are -therefore unsigned. The initialization for the delta encoding sets the field -number to -1, so an unsigned integer field 0 with value 7 is transmitted as unsigned -delta = 1, unsigned value = 7 or (01 07). Finally, after all the fields have been -sent a terminating mark denotes the end of the struct. That mark is a delta=0 -value, which has representation (00). +field has the zero value for its type (except for arrays; see above) or it's a +pointer to a zero value, it is omitted from the transmission. The field number +is defined by the type of the encoded struct: the first field of the encoded type +is field 0, the second is field 1, etc. When encoding a value, the field numbers +are delta encoded for efficiency and the fields are always sent in order of +increasing field number; the deltas are therefore unsigned. The initialization +for the delta encoding sets the field number to -1, so an unsigned integer field 0 +with value 7 is transmitted as unsigned delta = 1, unsigned value = 7 or (01 07). +Finally, after all the fields have been sent a terminating mark denotes the end +of the struct. That mark is a delta=0 value, which has representation (00). Interface types are not checked for compatibility; all interface types are treated, for transmission, as members of a single "interface" type, analogous to diff --git a/src/errors/join.go b/src/errors/join.go index 08a79867c6..730bf7043c 100644 --- a/src/errors/join.go +++ b/src/errors/join.go @@ -27,16 +27,6 @@ func Join(errs ...error) error { if n == 0 { return nil } - if n == 1 { - for _, err := range errs { - if _, ok := err.(interface { - Unwrap() []error - }); ok { - return err - } - } - } - e := &joinError{ errs: make([]error, 0, n), } diff --git a/src/errors/join_test.go b/src/errors/join_test.go index 439b372ca0..8ee4d7f77b 100644 --- a/src/errors/join_test.go +++ b/src/errors/join_test.go @@ -25,6 +25,7 @@ func TestJoinReturnsNil(t *testing.T) { func TestJoin(t *testing.T) { err1 := errors.New("err1") err2 := errors.New("err2") + merr := multiErr{errors.New("err3")} for _, test := range []struct { errs []error want []error @@ -37,6 +38,9 @@ func TestJoin(t *testing.T) { }, { errs: []error{err1, nil, err2}, want: []error{err1, err2}, + }, { + errs: []error{merr}, + want: []error{merr}, }} { got := errors.Join(test.errs...).(interface{ Unwrap() []error }).Unwrap() if !reflect.DeepEqual(got, test.want) { @@ -70,37 +74,3 @@ func TestJoinErrorMethod(t *testing.T) { } } } - -func BenchmarkJoin(b *testing.B) { - for _, bb := range []struct { - name string - errs []error - }{ - { - name: "no error", - }, - { - name: "single non-nil error", - errs: []error{errors.New("err")}, - }, - { - name: "multiple errors", - errs: []error{errors.New("err"), errors.New("newerr"), errors.New("newerr2")}, - }, - { - name: "unwrappable single error", - errs: []error{errors.Join(errors.New("err"))}, - }, - { - name: "nil first error", - errs: []error{nil, errors.New("newerr")}, - }, - } { - b.Run(bb.name, func(b *testing.B) { - b.ReportAllocs() - for i := 0; i < b.N; i++ { - _ = errors.Join(bb.errs...) - } - }) - } -} diff --git a/src/go/doc/comment_test.go b/src/go/doc/comment_test.go index 0e7de3eb78..2569e73c7c 100644 --- a/src/go/doc/comment_test.go +++ b/src/go/doc/comment_test.go @@ -24,12 +24,12 @@ func TestComment(t *testing.T) { pkg := New(pkgs["pkgdoc"], "testdata/pkgdoc", 0) var ( - input = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n" - wantHTML = `<p><a href="#T">T</a> and <a href="#U">U</a> are types, and <a href="#T.M">T.M</a> is a method, but [V] is a broken link. <a href="/math/rand#Int">rand.Int</a> and <a href="/crypto/rand#Reader">crand.Reader</a> are things. <a href="#G.M1">G.M1</a> and <a href="#G.M2">G.M2</a> are generic methods. <a href="#I.F">I.F</a> is an interface method and [I.V] is a broken link.` + "\n" - wantOldHTML = "<p>[T] and [U] are <i>types</i>, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n" - wantMarkdown = "[T](#T) and [U](#U) are types, and [T.M](#T.M) is a method, but \\[V] is a broken link. [rand.Int](/math/rand#Int) and [crand.Reader](/crypto/rand#Reader) are things. [G.M1](#G.M1) and [G.M2](#G.M2) are generic methods. [I.F](#I.F) is an interface method and \\[I.V] is a broken link.\n" - wantText = "T and U are types, and T.M is a method, but [V] is a broken link. rand.Int and\ncrand.Reader are things. G.M1 and G.M2 are generic methods. I.F is an interface\nmethod and [I.V] is a broken link.\n" - wantOldText = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link.\n[rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods.\n[I.F] is an interface method and [I.V] is a broken link.\n" + input = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.X] is a field, [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n" + wantHTML = `<p><a href="#T">T</a> and <a href="#U">U</a> are types, and <a href="#T.M">T.M</a> is a method, but [V] is a broken link. <a href="/math/rand#Int">rand.Int</a> and <a href="/crypto/rand#Reader">crand.Reader</a> are things. <a href="#G.X">G.X</a> is a field, <a href="#G.M1">G.M1</a> and <a href="#G.M2">G.M2</a> are generic methods. <a href="#I.F">I.F</a> is an interface method and [I.V] is a broken link.` + "\n" + wantOldHTML = "<p>[T] and [U] are <i>types</i>, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.X] is a field, [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n" + wantMarkdown = "[T](#T) and [U](#U) are types, and [T.M](#T.M) is a method, but \\[V] is a broken link. [rand.Int](/math/rand#Int) and [crand.Reader](/crypto/rand#Reader) are things. [G.X](#G.X) is a field, [G.M1](#G.M1) and [G.M2](#G.M2) are generic methods. [I.F](#I.F) is an interface method and \\[I.V] is a broken link.\n" + wantText = "T and U are types, and T.M is a method, but [V] is a broken link. rand.Int and\ncrand.Reader are things. G.X is a field, G.M1 and G.M2 are generic methods.\nI.F is an interface method and [I.V] is a broken link.\n" + wantOldText = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link.\n[rand.Int] and [crand.Reader] are things. [G.X] is a field, [G.M1] and [G.M2]\nare generic methods. [I.F] is an interface method and [I.V] is a broken link.\n" wantSynopsis = "T and U are types, and T.M is a method, but [V] is a broken link." wantOldSynopsis = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link." ) diff --git a/src/go/doc/doc.go b/src/go/doc/doc.go index 0c23f1a46c..8c786896fd 100644 --- a/src/go/doc/doc.go +++ b/src/go/doc/doc.go @@ -168,6 +168,7 @@ func (p *Package) collectTypes(types []*Type) { p.collectFuncs(t.Funcs) p.collectFuncs(t.Methods) p.collectInterfaceMethods(t) + p.collectStructFields(t) } } @@ -212,6 +213,24 @@ func (p *Package) collectInterfaceMethods(t *Type) { } } +func (p *Package) collectStructFields(t *Type) { + for _, s := range t.Decl.Specs { + spec, ok := s.(*ast.TypeSpec) + if !ok { + continue + } + list, isStruct := fields(spec.Type) + if !isStruct { + continue + } + for _, field := range list { + for _, name := range field.Names { + p.syms[t.Name+"."+name.Name] = true + } + } + } +} + // NewFromFiles computes documentation for a package. // // The package is specified by a list of *ast.Files and corresponding diff --git a/src/go/doc/example.go b/src/go/doc/example.go index ba1f863df0..8c01bf0a8d 100644 --- a/src/go/doc/example.go +++ b/src/go/doc/example.go @@ -74,6 +74,9 @@ func Examples(testFiles ...*ast.File) []*Example { if params := f.Type.Params; len(params.List) != 0 { continue // function has params; not a valid example } + if results := f.Type.Results; results != nil && len(results.List) != 0 { + continue // function has results; not a valid example + } if f.Body == nil { // ast.File.Body nil dereference (see issue 28044) continue } diff --git a/src/go/doc/example_test.go b/src/go/doc/example_test.go index 2fd54f8abb..db2b2d34cd 100644 --- a/src/go/doc/example_test.go +++ b/src/go/doc/example_test.go @@ -228,6 +228,8 @@ func ExampleFunc1_foo() {} func ExampleFunc1_foo_suffix() {} func ExampleFunc1_foo_Suffix() {} // matches Func1, instead of Func1_foo func Examplefunc1() {} // invalid - cannot match unexported +func ExampleFunc1_params(a int) {} // invalid - has parameter +func ExampleFunc1_results() int {} // invalid - has results func ExampleType1_Func1() {} func ExampleType1_Func1_() {} // invalid - suffix must start with a lower-case letter diff --git a/src/go/doc/testdata/pkgdoc/doc.go b/src/go/doc/testdata/pkgdoc/doc.go index d542dc2cdd..24e127c7fb 100644 --- a/src/go/doc/testdata/pkgdoc/doc.go +++ b/src/go/doc/testdata/pkgdoc/doc.go @@ -18,7 +18,7 @@ func (T) M() {} var _ = rand.Int var _ = crand.Reader -type G[T any] struct{ x T } +type G[T any] struct{ X T } func (g G[T]) M1() {} func (g *G[T]) M2() {} diff --git a/src/internal/coverage/decodemeta/decodefile.go b/src/internal/coverage/decodemeta/decodefile.go index 6f4dd1a3ec..474844bf97 100644 --- a/src/internal/coverage/decodemeta/decodefile.go +++ b/src/internal/coverage/decodemeta/decodefile.go @@ -75,7 +75,7 @@ func (r *CoverageMetaFileReader) readFileHeader() error { // Vet the version. If this is a meta-data file from the future, // we won't be able to read it. if r.hdr.Version > coverage.MetaFileVersion { - return fmt.Errorf("meta-data file withn unknown version %d (expected %d)", r.hdr.Version, coverage.MetaFileVersion) + return fmt.Errorf("meta-data file with an unknown version %d (expected %d)", r.hdr.Version, coverage.MetaFileVersion) } // Read package offsets for good measure diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go index 4610ce807e..711fb045c3 100644 --- a/src/internal/cpu/cpu_x86.go +++ b/src/internal/cpu/cpu_x86.go @@ -219,7 +219,7 @@ func doinit() { if eax7 >= 1 { eax71, _, _, _ := cpuid(7, 1) if X86.HasAVX { - X86.HasAVXVNNI = isSet(4, eax71) + X86.HasAVXVNNI = isSet(eax71, cpuid_AVXVNNI) } } diff --git a/src/net/rpc/server.go b/src/net/rpc/server.go index 4233a426fe..961145c6f2 100644 --- a/src/net/rpc/server.go +++ b/src/net/rpc/server.go @@ -202,7 +202,7 @@ func NewServer() *Server { // DefaultServer is the default instance of [*Server]. var DefaultServer = NewServer() -// Is this type exported or a builtin? +// isExportedOrBuiltinType reports whether t is an exported or builtin type func isExportedOrBuiltinType(t reflect.Type) bool { for t.Kind() == reflect.Pointer { t = t.Elem() diff --git a/src/os/exec/exec.go b/src/os/exec/exec.go index e84ebfc453..aa7a6be7f0 100644 --- a/src/os/exec/exec.go +++ b/src/os/exec/exec.go @@ -102,6 +102,7 @@ import ( "runtime" "strconv" "strings" + "sync/atomic" "syscall" "time" ) @@ -354,6 +355,11 @@ type Cmd struct { // the work of resolving the extension, so Start doesn't need to do it again. // This is only used on Windows. cachedLookExtensions struct{ in, out string } + + // startCalled records that Start was attempted, regardless of outcome. + // (Until go.dev/issue/77075 is resolved, we use atomic.SwapInt32, + // not atomic.Bool.Swap, to avoid triggering the copylocks vet check.) + startCalled int32 } // A ctxResult reports the result of watching the Context associated with a @@ -635,7 +641,8 @@ func (c *Cmd) Run() error { func (c *Cmd) Start() error { // Check for doubled Start calls before we defer failure cleanup. If the prior // call to Start succeeded, we don't want to spuriously close its pipes. - if c.Process != nil { + // It is an error to call Start twice even if the first call did not create a process. + if atomic.SwapInt32(&c.startCalled, 1) != 0 { return errors.New("exec: already started") } @@ -647,6 +654,7 @@ func (c *Cmd) Start() error { if !started { closeDescriptors(c.parentIOPipes) c.parentIOPipes = nil + c.goroutine = nil // aid GC, finalization of pipe fds } }() diff --git a/src/os/exec/exec_test.go b/src/os/exec/exec_test.go index 1decebdc22..bf2f3da535 100644 --- a/src/os/exec/exec_test.go +++ b/src/os/exec/exec_test.go @@ -1839,3 +1839,29 @@ func TestAbsPathExec(t *testing.T) { } }) } + +// Calling Start twice is an error, regardless of outcome. +func TestStart_twice(t *testing.T) { + testenv.MustHaveExec(t) + + cmd := exec.Command("/bin/nonesuch") + for i, want := range []string{ + cond(runtime.GOOS == "windows", + `exec: "/bin/nonesuch": executable file not found in %PATH%`, + "fork/exec /bin/nonesuch: no such file or directory"), + "exec: already started", + } { + err := cmd.Start() + if got := fmt.Sprint(err); got != want { + t.Errorf("Start call #%d return err %q, want %q", i+1, got, want) + } + } +} + +func cond[T any](cond bool, t, f T) T { + if cond { + return t + } else { + return f + } +} diff --git a/src/reflect/value.go b/src/reflect/value.go index 7f0ec2a397..8c8acbaa9a 100644 --- a/src/reflect/value.go +++ b/src/reflect/value.go @@ -362,6 +362,7 @@ func (v Value) CanSet() bool { // type of the function's corresponding input parameter. // If v is a variadic function, Call creates the variadic slice parameter // itself, copying in the corresponding values. +// It panics if the Value was obtained by accessing unexported struct fields. func (v Value) Call(in []Value) []Value { v.mustBe(Func) v.mustBeExported() @@ -375,6 +376,7 @@ func (v Value) Call(in []Value) []Value { // It returns the output results as Values. // As in Go, each input argument must be assignable to the // type of the function's corresponding input parameter. +// It panics if the Value was obtained by accessing unexported struct fields. func (v Value) CallSlice(in []Value) []Value { v.mustBe(Func) v.mustBeExported() diff --git a/src/regexp/find_test.go b/src/regexp/find_test.go index 49e9619cef..5b446c29cb 100644 --- a/src/regexp/find_test.go +++ b/src/regexp/find_test.go @@ -159,23 +159,23 @@ func TestFind(t *testing.T) { for _, test := range findTests { re := MustCompile(test.pat) if re.String() != test.pat { - t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat) + t.Errorf("re.String() = %q, want %q", re.String(), test.pat) } result := re.Find([]byte(test.text)) switch { case len(test.matches) == 0 && len(result) == 0: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %q, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case test.matches != nil && result != nil: - expect := test.text[test.matches[0][0]:test.matches[0][1]] + want := test.text[test.matches[0][0]:test.matches[0][1]] if len(result) != cap(result) { - t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test) + t.Errorf("got capacity %d, want %d: %s", cap(result), len(result), test) } - if expect != string(result) { - t.Errorf("expected %q got %q: %s", expect, result, test) + if want != string(result) { + t.Errorf("got %q, want %q: %s", result, want, test) } } } @@ -188,16 +188,16 @@ func TestFindString(t *testing.T) { case len(test.matches) == 0 && len(result) == 0: // ok case test.matches == nil && result != "": - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %q, want none: %s", result, test) case test.matches != nil && result == "": // Tricky because an empty result has two meanings: no match or empty match. if test.matches[0][0] != test.matches[0][1] { - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) } case test.matches != nil && result != "": - expect := test.text[test.matches[0][0]:test.matches[0][1]] - if expect != result { - t.Errorf("expected %q got %q: %s", expect, result, test) + want := test.text[test.matches[0][0]:test.matches[0][1]] + if want != result { + t.Errorf("got %q, want %q: %s", result, want, test) } } } @@ -208,13 +208,13 @@ func testFindIndex(test *FindTest, result []int, t *testing.T) { case len(test.matches) == 0 && len(result) == 0: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %v, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case test.matches != nil && result != nil: - expect := test.matches[0] - if expect[0] != result[0] || expect[1] != result[1] { - t.Errorf("expected %v got %v: %s", expect, result, test) + want := test.matches[0] + if want[0] != result[0] || want[1] != result[1] { + t.Errorf("got %v, want %v: %s", result, want, test) } } } @@ -246,22 +246,22 @@ func TestFindAll(t *testing.T) { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %q, want none: %s", result, test) case test.matches != nil && result == nil: - t.Fatalf("expected match; got none: %s", test) + t.Fatalf("got no match, want one: %s", test) case test.matches != nil && result != nil: if len(test.matches) != len(result) { - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test) continue } for k, e := range test.matches { got := result[k] if len(got) != cap(got) { - t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test) + t.Errorf("match %d: got capacity %d, want %d: %s", k, cap(got), len(got), test) } - expect := test.text[e[0]:e[1]] - if expect != string(got) { - t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test) + want := test.text[e[0]:e[1]] + if want != string(got) { + t.Errorf("match %d: got %q, want %q: %s", k, got, want, test) } } } @@ -275,18 +275,18 @@ func TestFindAllString(t *testing.T) { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %q, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case test.matches != nil && result != nil: if len(test.matches) != len(result) { - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test) continue } for k, e := range test.matches { - expect := test.text[e[0]:e[1]] - if expect != result[k] { - t.Errorf("expected %q got %q: %s", expect, result, test) + want := test.text[e[0]:e[1]] + if want != result[k] { + t.Errorf("got %q, want %q: %s", result[k], want, test) } } } @@ -298,17 +298,17 @@ func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %v, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case test.matches != nil && result != nil: if len(test.matches) != len(result) { - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test) return } for k, e := range test.matches { if e[0] != result[k][0] || e[1] != result[k][1] { - t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test) + t.Errorf("match %d: got %v, want %v: %s", k, result[k], e, test) } } } @@ -330,24 +330,24 @@ func TestFindAllStringIndex(t *testing.T) { func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) { if len(submatches) != len(result)*2 { - t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) + t.Errorf("match %d: got %d submatches, want %d: %s", n, len(result), len(submatches)/2, test) return } for k := 0; k < len(submatches); k += 2 { if submatches[k] == -1 { if result[k/2] != nil { - t.Errorf("match %d: expected nil got %q: %s", n, result, test) + t.Errorf("match %d: got %q, want nil: %s", n, result, test) } continue } got := result[k/2] if len(got) != cap(got) { - t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test) + t.Errorf("match %d: got capacity %d, want %d: %s", n, cap(got), len(got), test) return } - expect := test.text[submatches[k]:submatches[k+1]] - if expect != string(got) { - t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test) + want := test.text[submatches[k]:submatches[k+1]] + if want != string(got) { + t.Errorf("match %d: got %q, want %q: %s", n, got, want, test) return } } @@ -360,9 +360,9 @@ func TestFindSubmatch(t *testing.T) { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %q, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case test.matches != nil && result != nil: testSubmatchBytes(&test, 0, test.matches[0], result, t) } @@ -371,19 +371,19 @@ func TestFindSubmatch(t *testing.T) { func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) { if len(submatches) != len(result)*2 { - t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) + t.Errorf("match %d: got %d submatches, want %d: %s", n, len(result), len(submatches)/2, test) return } for k := 0; k < len(submatches); k += 2 { if submatches[k] == -1 { if result[k/2] != "" { - t.Errorf("match %d: expected nil got %q: %s", n, result, test) + t.Errorf("match %d: got %q, want empty string: %s", n, result, test) } continue } - expect := test.text[submatches[k]:submatches[k+1]] - if expect != result[k/2] { - t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) + want := test.text[submatches[k]:submatches[k+1]] + if want != result[k/2] { + t.Errorf("match %d: got %q, want %q: %s", n, result[k/2], want, test) return } } @@ -396,23 +396,23 @@ func TestFindStringSubmatch(t *testing.T) { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %q, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case test.matches != nil && result != nil: testSubmatchString(&test, 0, test.matches[0], result, t) } } } -func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) { - if len(expect) != len(result) { - t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test) +func testSubmatchIndices(test *FindTest, n int, want, result []int, t *testing.T) { + if len(want) != len(result) { + t.Errorf("match %d: got %d matches, want %d: %s", n, len(result)/2, len(want)/2, test) return } - for k, e := range expect { + for k, e := range want { if e != result[k] { - t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test) + t.Errorf("match %d: submatch error: got %v, want %v: %s", n, result, want, test) } } } @@ -422,9 +422,9 @@ func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %v, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case test.matches != nil && result != nil: testSubmatchIndices(test, 0, test.matches[0], result, t) } @@ -457,11 +457,11 @@ func TestFindAllSubmatch(t *testing.T) { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %q, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case len(test.matches) != len(result): - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test) case test.matches != nil && result != nil: for k, match := range test.matches { testSubmatchBytes(&test, k, match, result[k], t) @@ -477,11 +477,11 @@ func TestFindAllStringSubmatch(t *testing.T) { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %q, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case len(test.matches) != len(result): - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test) case test.matches != nil && result != nil: for k, match := range test.matches { testSubmatchString(&test, k, match, result[k], t) @@ -495,11 +495,11 @@ func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: - t.Errorf("expected no match; got one: %s", test) + t.Errorf("got match %v, want none: %s", result, test) case test.matches != nil && result == nil: - t.Errorf("expected match; got none: %s", test) + t.Errorf("got no match, want one: %s", test) case len(test.matches) != len(result): - t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + t.Errorf("got %d matches, want %d: %s", len(result), len(test.matches), test) case test.matches != nil && result != nil: for k, match := range test.matches { testSubmatchIndices(test, k, match, result[k], t) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index fd79356aba..c08bc7574b 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1213,7 +1213,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { if goexperiment.RuntimeSecret && gp.secret > 0 { // Mark any object allocated while in secret mode as secret. // This ensures we zero it immediately when freeing it. - addSecret(x) + addSecret(x, size) } // Notify sanitizers, if enabled. diff --git a/src/runtime/malloc_generated.go b/src/runtime/malloc_generated.go index cf329d2696..2be6a5b6f5 100644 --- a/src/runtime/malloc_generated.go +++ b/src/runtime/malloc_generated.go @@ -156,7 +156,7 @@ func mallocgcSmallScanNoHeaderSC1(size uintptr, typ *_type, needzero bool) unsaf gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -321,7 +321,7 @@ func mallocgcSmallScanNoHeaderSC2(size uintptr, typ *_type, needzero bool) unsaf gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -486,7 +486,7 @@ func mallocgcSmallScanNoHeaderSC3(size uintptr, typ *_type, needzero bool) unsaf gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -651,7 +651,7 @@ func mallocgcSmallScanNoHeaderSC4(size uintptr, typ *_type, needzero bool) unsaf gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -816,7 +816,7 @@ func mallocgcSmallScanNoHeaderSC5(size uintptr, typ *_type, needzero bool) unsaf gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -981,7 +981,7 @@ func mallocgcSmallScanNoHeaderSC6(size uintptr, typ *_type, needzero bool) unsaf gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -1146,7 +1146,7 @@ func mallocgcSmallScanNoHeaderSC7(size uintptr, typ *_type, needzero bool) unsaf gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -1311,7 +1311,7 @@ func mallocgcSmallScanNoHeaderSC8(size uintptr, typ *_type, needzero bool) unsaf gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -1476,7 +1476,7 @@ func mallocgcSmallScanNoHeaderSC9(size uintptr, typ *_type, needzero bool) unsaf gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -1641,7 +1641,7 @@ func mallocgcSmallScanNoHeaderSC10(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -1806,7 +1806,7 @@ func mallocgcSmallScanNoHeaderSC11(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -1971,7 +1971,7 @@ func mallocgcSmallScanNoHeaderSC12(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -2136,7 +2136,7 @@ func mallocgcSmallScanNoHeaderSC13(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -2301,7 +2301,7 @@ func mallocgcSmallScanNoHeaderSC14(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -2466,7 +2466,7 @@ func mallocgcSmallScanNoHeaderSC15(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -2631,7 +2631,7 @@ func mallocgcSmallScanNoHeaderSC16(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -2796,7 +2796,7 @@ func mallocgcSmallScanNoHeaderSC17(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -2961,7 +2961,7 @@ func mallocgcSmallScanNoHeaderSC18(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -3126,7 +3126,7 @@ func mallocgcSmallScanNoHeaderSC19(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -3291,7 +3291,7 @@ func mallocgcSmallScanNoHeaderSC20(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -3456,7 +3456,7 @@ func mallocgcSmallScanNoHeaderSC21(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -3621,7 +3621,7 @@ func mallocgcSmallScanNoHeaderSC22(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -3786,7 +3786,7 @@ func mallocgcSmallScanNoHeaderSC23(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -3951,7 +3951,7 @@ func mallocgcSmallScanNoHeaderSC24(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -4116,7 +4116,7 @@ func mallocgcSmallScanNoHeaderSC25(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -4281,7 +4281,7 @@ func mallocgcSmallScanNoHeaderSC26(size uintptr, typ *_type, needzero bool) unsa gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -6686,7 +6686,7 @@ func mallocgcSmallNoScanSC2(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -6757,7 +6757,7 @@ func mallocgcSmallNoScanSC2(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -6822,7 +6822,7 @@ func mallocgcSmallNoScanSC3(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -6893,7 +6893,7 @@ func mallocgcSmallNoScanSC3(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -6958,7 +6958,7 @@ func mallocgcSmallNoScanSC4(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7029,7 +7029,7 @@ func mallocgcSmallNoScanSC4(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7094,7 +7094,7 @@ func mallocgcSmallNoScanSC5(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7165,7 +7165,7 @@ func mallocgcSmallNoScanSC5(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7230,7 +7230,7 @@ func mallocgcSmallNoScanSC6(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7301,7 +7301,7 @@ func mallocgcSmallNoScanSC6(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7366,7 +7366,7 @@ func mallocgcSmallNoScanSC7(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7437,7 +7437,7 @@ func mallocgcSmallNoScanSC7(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7502,7 +7502,7 @@ func mallocgcSmallNoScanSC8(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7573,7 +7573,7 @@ func mallocgcSmallNoScanSC8(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7638,7 +7638,7 @@ func mallocgcSmallNoScanSC9(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7709,7 +7709,7 @@ func mallocgcSmallNoScanSC9(size uintptr, typ *_type, needzero bool) unsafe.Poin gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7774,7 +7774,7 @@ func mallocgcSmallNoScanSC10(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7845,7 +7845,7 @@ func mallocgcSmallNoScanSC10(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7910,7 +7910,7 @@ func mallocgcSmallNoScanSC11(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -7981,7 +7981,7 @@ func mallocgcSmallNoScanSC11(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8046,7 +8046,7 @@ func mallocgcSmallNoScanSC12(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8117,7 +8117,7 @@ func mallocgcSmallNoScanSC12(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8182,7 +8182,7 @@ func mallocgcSmallNoScanSC13(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8253,7 +8253,7 @@ func mallocgcSmallNoScanSC13(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8318,7 +8318,7 @@ func mallocgcSmallNoScanSC14(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8389,7 +8389,7 @@ func mallocgcSmallNoScanSC14(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8454,7 +8454,7 @@ func mallocgcSmallNoScanSC15(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8525,7 +8525,7 @@ func mallocgcSmallNoScanSC15(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8590,7 +8590,7 @@ func mallocgcSmallNoScanSC16(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8661,7 +8661,7 @@ func mallocgcSmallNoScanSC16(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8726,7 +8726,7 @@ func mallocgcSmallNoScanSC17(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8797,7 +8797,7 @@ func mallocgcSmallNoScanSC17(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8862,7 +8862,7 @@ func mallocgcSmallNoScanSC18(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8933,7 +8933,7 @@ func mallocgcSmallNoScanSC18(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -8998,7 +8998,7 @@ func mallocgcSmallNoScanSC19(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9069,7 +9069,7 @@ func mallocgcSmallNoScanSC19(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9134,7 +9134,7 @@ func mallocgcSmallNoScanSC20(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9205,7 +9205,7 @@ func mallocgcSmallNoScanSC20(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9270,7 +9270,7 @@ func mallocgcSmallNoScanSC21(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9341,7 +9341,7 @@ func mallocgcSmallNoScanSC21(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9406,7 +9406,7 @@ func mallocgcSmallNoScanSC22(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9477,7 +9477,7 @@ func mallocgcSmallNoScanSC22(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9542,7 +9542,7 @@ func mallocgcSmallNoScanSC23(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9613,7 +9613,7 @@ func mallocgcSmallNoScanSC23(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9678,7 +9678,7 @@ func mallocgcSmallNoScanSC24(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9749,7 +9749,7 @@ func mallocgcSmallNoScanSC24(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9814,7 +9814,7 @@ func mallocgcSmallNoScanSC25(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9885,7 +9885,7 @@ func mallocgcSmallNoScanSC25(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -9950,7 +9950,7 @@ func mallocgcSmallNoScanSC26(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { @@ -10021,7 +10021,7 @@ func mallocgcSmallNoScanSC26(size uintptr, typ *_type, needzero bool) unsafe.Poi gp := getg() if goexperiment.RuntimeSecret && gp.secret > 0 { - addSecret(x) + addSecret(x, size) } if valgrindenabled { diff --git a/src/runtime/malloc_stubs.go b/src/runtime/malloc_stubs.go index 8c424935bf..b395172e4b 100644 --- a/src/runtime/malloc_stubs.go +++ b/src/runtime/malloc_stubs.go @@ -101,7 +101,7 @@ func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer { if goexperiment.RuntimeSecret && gp.secret > 0 { // Mark any object allocated while in secret mode as secret. // This ensures we zero it immediately when freeing it. - addSecret(x) + addSecret(x, size) } } diff --git a/src/runtime/mcleanup_test.go b/src/runtime/mcleanup_test.go index 5afe85e103..dfc688a0f2 100644 --- a/src/runtime/mcleanup_test.go +++ b/src/runtime/mcleanup_test.go @@ -331,9 +331,14 @@ func TestCleanupLost(t *testing.T) { } wg.Wait() runtime.GC() - runtime.BlockUntilEmptyCleanupQueue(int64(10 * time.Second)) + timeout := 10 * time.Second + empty := runtime.BlockUntilEmptyCleanupQueue(int64(timeout)) + if !empty { + t.Errorf("failed to drain cleanup queue within %s", timeout) + } + if got := int(got.Load()); got != want { - t.Errorf("expected %d cleanups to be executed, got %d", got, want) + t.Errorf("%d cleanups executed, expected %d", got, want) } } diff --git a/src/runtime/metrics_cgo_test.go b/src/runtime/metrics_cgo_test.go index 6cc9d23195..ef1e3dd71d 100644 --- a/src/runtime/metrics_cgo_test.go +++ b/src/runtime/metrics_cgo_test.go @@ -12,7 +12,7 @@ import ( "testing" ) -func TestNotInGoMetricCallback(t *testing.T) { +func TestNotInGoMetric(t *testing.T) { switch runtime.GOOS { case "windows", "plan9": t.Skip("unsupported on Windows and Plan9") @@ -22,11 +22,22 @@ func TestNotInGoMetricCallback(t *testing.T) { } } - // This test is run in a subprocess to prevent other tests from polluting the metrics - // and because we need to make some cgo callbacks. - output := runTestProg(t, "testprogcgo", "NotInGoMetricCallback") - want := "OK\n" - if output != want { - t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want) + run := func(t *testing.T, name string) { + // This test is run in a subprocess to prevent other tests from polluting the metrics + // and because we need to make some cgo callbacks. + output := runTestProg(t, "testprogcgo", name) + want := "OK\n" + if output != want { + t.Fatalf("output:\n%s\n\nwanted:\n%s", output, want) + } } + t.Run("CgoCall", func(t *testing.T) { + run(t, "NotInGoMetricCgoCall") + }) + t.Run("CgoCallback", func(t *testing.T) { + run(t, "NotInGoMetricCgoCallback") + }) + t.Run("CgoCallAndCallback", func(t *testing.T) { + run(t, "NotInGoMetricCgoCallAndCallback") + }) } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 61dc5457fc..68dfca4668 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -2745,6 +2745,14 @@ type specialPinCounter struct { counter uintptr } +// specialSecret tracks whether we need to zero an object immediately +// upon freeing. +type specialSecret struct { + _ sys.NotInHeap + special special + size uintptr +} + // specialsIter helps iterate over specials lists. type specialsIter struct { pprev **special @@ -2775,6 +2783,12 @@ func (i *specialsIter) unlinkAndNext() *special { // freeSpecial performs any cleanup on special s and deallocates it. // s must already be unlinked from the specials list. +// TODO(mknyszek): p and size together DO NOT represent a valid allocation. +// size is the size of the allocation block in the span (mspan.elemsize), and p is +// whatever pointer the special was attached to, which need not point to the +// beginning of the block, though it may. +// Consider passing the arguments differently to avoid giving the impression +// that p and size together represent an address range. func freeSpecial(s *special, p unsafe.Pointer, size uintptr) { switch s.kind { case _KindSpecialFinalizer: @@ -2828,7 +2842,19 @@ func freeSpecial(s *special, p unsafe.Pointer, size uintptr) { mheap_.specialBubbleAlloc.free(unsafe.Pointer(st)) unlock(&mheap_.speciallock) case _KindSpecialSecret: - memclrNoHeapPointers(p, size) + ss := (*specialSecret)(unsafe.Pointer(s)) + // p is the actual byte location that the special was + // attached to, but the size argument is the span + // element size. If we were to zero out using the size + // argument, we'd trounce over adjacent memory in cases + // where the allocation contains a header. Hence, we use + // the user-visible size which we stash in the special itself. + // + // p always points to the beginning of the user-visible + // allocation since the only way to attach a secret special + // is via the allocation path. This isn't universal for + // tiny allocs, but we avoid them in mallocgc anyway. + memclrNoHeapPointers(p, ss.size) lock(&mheap_.speciallock) mheap_.specialSecretAlloc.free(unsafe.Pointer(s)) unlock(&mheap_.speciallock) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 5ea96f03f5..005c875cbf 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2455,8 +2455,16 @@ func needm(signal bool) { // mp.curg is now a real goroutine. casgstatus(mp.curg, _Gdeadextra, _Gsyscall) sched.ngsys.Add(-1) - // N.B. We do not update nGsyscallNoP, because isExtraInC threads are not - // counted as real goroutines while they're in C. + + // This is technically inaccurate, but we set isExtraInC to false above, + // and so we need to update addGSyscallNoP to keep the two pieces of state + // consistent (it's only updated when isExtraInC is false). More specifically, + // When we get to cgocallbackg and exitsyscall, we'll be looking for a P, and + // since isExtraInC is false, we will decrement this metric. + // + // The inaccuracy is thankfully transient: only until this thread can get a P. + // We're going into Go anyway, so it's okay to pretend we're a real goroutine now. + addGSyscallNoP(mp) if !signal { if trace.ok() { @@ -5027,7 +5035,7 @@ func exitsyscallTryGetP(oldp *p) *p { if oldp != nil { if thread, ok := setBlockOnExitSyscall(oldp); ok { thread.takeP() - addGSyscallNoP(thread.mp) // takeP does the opposite, but this is a net zero change. + decGSyscallNoP(getg().m) // We got a P for ourselves. thread.resume() return oldp } diff --git a/src/runtime/rt0_freebsd_arm64.s b/src/runtime/rt0_freebsd_arm64.s index a7a952664e..93562c5dd0 100644 --- a/src/runtime/rt0_freebsd_arm64.s +++ b/src/runtime/rt0_freebsd_arm64.s @@ -4,9 +4,12 @@ #include "textflag.h" -// On FreeBSD argc/argv are passed in R0, not RSP +// FreeBSD passes a pointer to the argument block in R0, not RSP, +// so _rt0_arm64 cannot be used. TEXT _rt0_arm64_freebsd(SB),NOSPLIT,$0 - JMP _rt0_arm64(SB) + ADD $8, R0, R1 // argv (use R0 while it's still the pointer) + MOVD 0(R0), R0 // argc + JMP runtime·rt0_go(SB) // When building with -buildmode=c-shared, this symbol is called when the shared // library is loaded. diff --git a/src/runtime/secret.go b/src/runtime/secret.go index 4c199d31d0..8aad63b54f 100644 --- a/src/runtime/secret.go +++ b/src/runtime/secret.go @@ -55,15 +55,9 @@ func secret_eraseSecrets() { // Don't put any code here: the stack frame's contents are gone! } -// specialSecret tracks whether we need to zero an object immediately -// upon freeing. -type specialSecret struct { - special special -} - // addSecret records the fact that we need to zero p immediately // when it is freed. -func addSecret(p unsafe.Pointer) { +func addSecret(p unsafe.Pointer, size uintptr) { // TODO(dmo): figure out the cost of these. These are mostly // intended to catch allocations that happen via the runtime // that the user has no control over and not big buffers that user @@ -72,6 +66,7 @@ func addSecret(p unsafe.Pointer) { lock(&mheap_.speciallock) s := (*specialSecret)(mheap_.specialSecretAlloc.alloc()) s.special.kind = _KindSpecialSecret + s.size = size unlock(&mheap_.speciallock) addspecial(p, &s.special, false) } diff --git a/src/runtime/secret/alloc_test.go b/src/runtime/secret/alloc_test.go new file mode 100644 index 0000000000..8f82dad4b5 --- /dev/null +++ b/src/runtime/secret/alloc_test.go @@ -0,0 +1,39 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.runtimesecret && (arm64 || amd64) && linux + +package secret_test + +import ( + "runtime" + "runtime/secret" + "testing" +) + +func TestInterleavedAllocFrees(t *testing.T) { + // Interleave heap objects that are kept alive beyond secret.Do + // with heap objects that do not live past secret.Do. + // The intent is for the clearing of one object (with the wrong size) + // to clobber the type header of the next slot. If the GC sees a nil type header + // when it expects to find one, it can throw. + type T struct { + p *int + x [1024]byte + } + for range 10 { + var s []*T + secret.Do(func() { + for i := range 100 { + t := &T{} + if i%2 == 0 { + s = append(s, t) + } + } + }) + runtime.GC() + runtime.GC() + runtime.KeepAlive(s) + } +} diff --git a/src/runtime/secret/doc.go b/src/runtime/secret/doc.go new file mode 100644 index 0000000000..c0dd4f95a6 --- /dev/null +++ b/src/runtime/secret/doc.go @@ -0,0 +1,15 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.runtimesecret + +// Package secret contains helper functions for zeroing out memory +// that is otherwise invisible to a user program in the service of +// forward secrecy. See https://en.wikipedia.org/wiki/Forward_secrecy for +// more information. +// +// This package (runtime/secret) is experimental, +// and not subject to the Go 1 compatibility promise. +// It only exists when building with the GOEXPERIMENT=runtimesecret environment variable set. +package secret diff --git a/src/runtime/secret/secret.go b/src/runtime/secret/secret.go index 9eae22605f..00a03b2d50 100644 --- a/src/runtime/secret/secret.go +++ b/src/runtime/secret/secret.go @@ -18,12 +18,23 @@ import ( // entire call tree initiated by f.) // - Any registers used by f are erased before Do returns. // - Any stack used by f is erased before Do returns. -// - Any heap allocation done by f is erased as soon as the garbage -// collector realizes that it is no longer reachable. +// - Heap allocations done by f are erased as soon as the garbage +// collector realizes that all allocated values are no longer reachable. // - Do works even if f panics or calls runtime.Goexit. As part of // that, any panic raised by f will appear as if it originates from // Do itself. // +// Users should be cautious of allocating inside Do. +// Erasing heap memory after Do returns may increase garbage collector sweep times and +// requires additional memory to keep track of allocations until they are to be erased. +// These costs can compound when an allocation is done in the service of growing a value, +// like appending to a slice or inserting into a map. In these cases, the entire new allocation is erased rather +// than just the secret parts of it. +// +// To reduce lifetimes of allocations and avoid unexpected performance issues, +// if a function invoked by Do needs to yield a result that shouldn't be erased, +// it should do so by copying the result into an allocation created by the caller. +// // Limitations: // - Currently only supported on linux/amd64 and linux/arm64. On unsupported // platforms, Do will invoke f directly. diff --git a/src/runtime/secret/secret_test.go b/src/runtime/secret/secret_test.go index 98d67cf8a4..e2f78c53a0 100644 --- a/src/runtime/secret/secret_test.go +++ b/src/runtime/secret/secret_test.go @@ -19,6 +19,7 @@ import ( "testing" "time" "unsafe" + "weak" ) type secretType int64 @@ -63,28 +64,33 @@ func heapSTiny() *secretType { // are freed. // See runtime/mheap.go:freeSpecial. func TestHeap(t *testing.T) { - var u uintptr + var addr uintptr + var p weak.Pointer[S] Do(func() { - u = uintptr(unsafe.Pointer(heapS())) + sp := heapS() + addr = uintptr(unsafe.Pointer(sp)) + p = weak.Make(sp) }) - - runtime.GC() + waitCollected(t, p) // Check that object got zeroed. - checkRangeForSecret(t, u, u+unsafe.Sizeof(S{})) + checkRangeForSecret(t, addr, addr+unsafe.Sizeof(S{})) // Also check our stack, just because we can. checkStackForSecret(t) } func TestHeapTiny(t *testing.T) { - var u uintptr + var addr uintptr + var p weak.Pointer[secretType] Do(func() { - u = uintptr(unsafe.Pointer(heapSTiny())) + sp := heapSTiny() + addr = uintptr(unsafe.Pointer(sp)) + p = weak.Make(sp) }) - runtime.GC() + waitCollected(t, p) // Check that object got zeroed. - checkRangeForSecret(t, u, u+unsafe.Sizeof(secretType(0))) + checkRangeForSecret(t, addr, addr+unsafe.Sizeof(secretType(0))) // Also check our stack, just because we can. checkStackForSecret(t) } @@ -240,6 +246,20 @@ func checkRangeForSecret(t *testing.T, lo, hi uintptr) { } } +func waitCollected[P any](t *testing.T, ptr weak.Pointer[P]) { + t.Helper() + i := 0 + for ptr.Value() != nil { + runtime.GC() + i++ + // 20 seems like a decent number of times to try + if i > 20 { + t.Errorf("value was never collected") + } + } + t.Logf("number of cycles until collection: %d", i) +} + func TestRegisters(t *testing.T) { Do(func() { s := makeS() diff --git a/src/runtime/secret_nosecret.go b/src/runtime/secret_nosecret.go index bf50fb5a54..0692d6bf70 100644 --- a/src/runtime/secret_nosecret.go +++ b/src/runtime/secret_nosecret.go @@ -22,9 +22,7 @@ func secret_dec() {} //go:linkname secret_eraseSecrets runtime/secret.eraseSecrets func secret_eraseSecrets() {} -func addSecret(p unsafe.Pointer) {} - -type specialSecret struct{} +func addSecret(p unsafe.Pointer, size uintptr) {} //go:linkname secret_getStack runtime/secret.getStack func secret_getStack() (uintptr, uintptr) { return 0, 0 } diff --git a/src/runtime/testdata/testprogcgo/notingo.go b/src/runtime/testdata/testprogcgo/notingo.go index 5af4c00e1f..a385ae24d6 100644 --- a/src/runtime/testdata/testprogcgo/notingo.go +++ b/src/runtime/testdata/testprogcgo/notingo.go @@ -12,6 +12,7 @@ package main #include <pthread.h> extern void Ready(); +extern void BlockForeverInGo(); static _Atomic int spinning; static _Atomic int released; @@ -40,6 +41,21 @@ static void Release() { atomic_store(&spinning, 0); atomic_store(&released, 1); } + +static void* enterGoThenWait(void* arg __attribute__ ((unused))) { + BlockForeverInGo(); + return NULL; +} + +static void WaitInGoInNewCThread() { + pthread_t tid; + pthread_create(&tid, NULL, enterGoThenWait, NULL); +} + +static void SpinForever() { + atomic_fetch_add(&spinning, 1); + while(1) {}; +} */ import "C" @@ -47,15 +63,62 @@ import ( "os" "runtime" "runtime/metrics" + "sync/atomic" ) func init() { - register("NotInGoMetricCallback", NotInGoMetricCallback) + register("NotInGoMetricCgoCall", NotInGoMetricCgoCall) + register("NotInGoMetricCgoCallback", NotInGoMetricCgoCallback) + register("NotInGoMetricCgoCallAndCallback", NotInGoMetricCgoCallAndCallback) } -func NotInGoMetricCallback() { +// NotInGoMetric just double-checks that N goroutines in cgo count as the metric reading N. +func NotInGoMetricCgoCall() { const N = 10 + + // Spin up the same number of goroutines that will all wait in a cgo call. + for range N { + go func() { + C.SpinForever() + }() + } + + // Make sure we're all blocked and spinning. + for C.Spinning() < N { + } + + // Read not-in-go before taking the Ps back. s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}} + failed := false + metrics.Read(s) + if n := s[0].Value.Uint64(); n != N { + println("pre-STW: expected", N, "not-in-go goroutines, found", n) + } + + // Do something that stops the world to take all the Ps back. + // + // This will force a re-accounting of some of the goroutines and + // re-checking not-in-go will help catch bugs. + runtime.ReadMemStats(&m) + + // Read not-in-go. + metrics.Read(s) + if n := s[0].Value.Uint64(); n != N { + println("post-STW: expected", N, "not-in-go goroutines, found", n) + } + + // Fail if we get a bad reading. + if failed { + os.Exit(2) + } + println("OK") +} + +// NotInGoMetricCgoCallback tests that threads that called into Go, then returned +// to C with *no* Go on the stack, are *not* counted as not-in-go in the +// runtime/metrics package. +func NotInGoMetricCgoCallback() { + const N = 10 // Create N new C threads that have called into Go at least once. for range N { @@ -90,6 +153,7 @@ func NotInGoMetricCallback() { } // Read not-in-go. + s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}} metrics.Read(s) if n := s[0].Value.Uint64(); n != 0 { println("expected 0 not-in-go goroutines, found", n) @@ -105,3 +169,69 @@ var readyCh = make(chan bool) func Ready() { readyCh <- true } + +// NotInGoMetricCgoCallAndCallback tests that threads that called into Go are not +// keeping the count of not-in-go threads negative. Specifically, needm sets +// isExtraInC to false, breaking some of the invariants behind the not-in-go +// runtime/metrics metric, causing the underlying count to break if we don't +// account for this. In go.dev/cl/726964 this amounts to nGsyscallNoP being negative. +// Unfortunately the runtime/metrics package masks a negative nGsyscallNoP because +// it can transiently go negative due to a race. Therefore, this test checks +// the condition by making sure not-in-go is positive when we expect it to be. +// That is, threads in a cgo callback are *not* cancelling out threads in a +// regular cgo call. +func NotInGoMetricCgoCallAndCallback() { + const N = 10 + + // Spin up some threads that will do a cgo callback and just wait in Go. + // These threads are the ones we're worried about having the incorrect + // accounting that skews the count later. + for range N { + C.WaitInGoInNewCThread() + } + + // Spin up the same number of goroutines that will all wait in a cgo call. + for range N { + go func() { + C.SpinForever() + }() + } + + // Make sure we're all blocked and spinning. + for C.Spinning() < N || blockedForever.Load() < N { + } + + // Read not-in-go before taking the Ps back. + s := []metrics.Sample{{Name: "/sched/goroutines/not-in-go:goroutines"}} + failed := false + metrics.Read(s) + if n := s[0].Value.Uint64(); n != N { + println("pre-STW: expected", N, "not-in-go goroutines, found", n) + } + + // Do something that stops the world to take all the Ps back. + // + // This will force a re-accounting of some of the goroutines and + // re-checking not-in-go will help catch bugs. + runtime.ReadMemStats(&m) + + // Read not-in-go. + metrics.Read(s) + if n := s[0].Value.Uint64(); n != N { + println("post-STW: expected", N, "not-in-go goroutines, found", n) + } + + // Fail if we get a bad reading. + if failed { + os.Exit(2) + } + println("OK") +} + +var blockedForever atomic.Uint32 + +//export BlockForeverInGo +func BlockForeverInGo() { + blockedForever.Add(1) + select {} +} diff --git a/src/runtime/trace.go b/src/runtime/trace.go index a7e8937a05..5f568d205e 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -12,7 +12,7 @@ // // ## Design // -// The basic idea behind the the execution tracer is to have per-M buffers that +// The basic idea behind the execution tracer is to have per-M buffers that // trace data may be written into. Each M maintains a write flag indicating whether // its trace buffer is currently in use. // @@ -173,7 +173,7 @@ // doesn't do this directly for performance reasons. The runtime implementation instead caches // a G on the M created for the C thread. On Linux this M is then cached in the thread's TLS, // and on other systems, the M is put on a global list on exit from Go. We need to do some -// extra work to make sure that this is modeled correctly in the the tracer. For example, +// extra work to make sure that this is modeled correctly in the tracer. For example, // a C thread exiting Go may leave a P hanging off of its M (whether that M is kept in TLS // or placed back on a list). In order to correctly model goroutine creation and destruction, // we must behave as if the P was at some point stolen by the runtime, if the C thread diff --git a/src/runtime/tracebuf.go b/src/runtime/tracebuf.go index 5adaede424..1caf69f8b8 100644 --- a/src/runtime/tracebuf.go +++ b/src/runtime/tracebuf.go @@ -29,7 +29,7 @@ type traceWriter struct { *traceBuf } -// writer returns an a traceWriter that writes into the current M's stream. +// writer returns a traceWriter that writes into the current M's stream. // // Once this is called, the caller must guard against stack growth until // end is called on it. Therefore, it's highly recommended to use this diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go index ca4f73c738..dd3a75eb44 100644 --- a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go @@ -30,6 +30,13 @@ func (x simdType) ElemBits() int { return x.Size / x.Lanes } +func (x simdType) Article() string { + if strings.HasPrefix(x.Name, "Int") { + return "an" + } + return "a" // Float, Uint +} + // LanesContainer returns the smallest int/uint bit size that is // large enough to hold one bit for each lane. E.g., Mask32x4 // is 4 lanes, and a uint8 is the smallest uint that has 4 bits. @@ -86,6 +93,33 @@ func (x simdType) MaskedStoreDoc() string { } } +func (x simdType) ToBitsDoc() string { + if x.Size == 512 || x.ElemBits() == 16 { + return fmt.Sprintf("// Asm: KMOV%s, CPU Features: AVX512", x.IntelSizeSuffix()) + } + // 128/256 bit vectors with 8, 32, 64 bit elements + var asm string + var feat string + switch x.ElemBits() { + case 8: + asm = "VPMOVMSKB" + if x.Size == 256 { + feat = "AVX2" + } else { + feat = "AVX" + } + case 32: + asm = "VMOVMSKPS" + feat = "AVX" + case 64: + asm = "VMOVMSKPD" + feat = "AVX" + default: + panic("unexpected ElemBits") + } + return fmt.Sprintf("// Asm: %s, CPU Features: %s", asm, feat) +} + func compareSimdTypes(x, y simdType) int { // "vreg" then "mask" if c := -compareNatural(x.Type, y.Type); c != 0 { @@ -135,7 +169,11 @@ type v{{.}} struct { {{end}} {{define "typeTmpl"}} -// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}} +{{- if eq .Type "mask"}} +// {{.Name}} is a mask for a SIMD vector of {{.Lanes}} {{.ElemBits}}-bit elements. +{{- else}} +// {{.Name}} is a {{.Size}}-bit SIMD vector of {{.Lanes}} {{.Base}}s. +{{- end}} type {{.Name}} struct { {{.Fields}} } @@ -171,15 +209,15 @@ func (X86Features) {{.Feature}}() bool { ` const simdLoadStoreTemplate = ` -// Len returns the number of elements in a {{.Name}} +// Len returns the number of elements in {{.Article}} {{.Name}}. func (x {{.Name}}) Len() int { return {{.Lanes}} } -// Load{{.Name}} loads a {{.Name}} from an array +// Load{{.Name}} loads {{.Article}} {{.Name}} from an array. // //go:noescape func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}} -// Store stores a {{.Name}} to an array +// Store stores {{.Article}} {{.Name}} to an array. // //go:noescape func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}}) @@ -199,21 +237,21 @@ func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}} // Only the lower {{.Lanes}} bits of y are used. {{- end}} // -// Asm: KMOV{{.IntelSizeSuffix}}, CPU Features: AVX512 +{{.ToBitsDoc}} func (x {{.Name}}) ToBits() uint{{.LanesContainer}} ` const simdMaskedLoadStoreTemplate = ` -// LoadMasked{{.Name}} loads a {{.Name}} from an array, -// at those elements enabled by mask +// LoadMasked{{.Name}} loads {{.Article}} {{.Name}} from an array, +// at those elements enabled by mask. // {{.MaskedLoadDoc}} // //go:noescape func LoadMasked{{.Name}}(y *[{{.Lanes}}]{{.Base}}, mask Mask{{.ElemBits}}x{{.Lanes}}) {{.Name}} -// StoreMasked stores a {{.Name}} to an array, -// at those elements enabled by mask +// StoreMasked stores {{.Article}} {{.Name}} to an array, +// at those elements enabled by mask. // {{.MaskedStoreDoc}} // @@ -395,15 +433,15 @@ func ({{.Op1NameAndType "x"}}) {{.Go}}({{.ImmName}} uint8, {{.Op2NameAndType "y" {{end}} {{define "vectorConversion"}} -// {{.Tdst.Name}} converts from {{.Tsrc.Name}} to {{.Tdst.Name}} -func (from {{.Tsrc.Name}}) As{{.Tdst.Name}}() (to {{.Tdst.Name}}) +// As{{.Tdst.Name}} returns {{.Tdst.Article}} {{.Tdst.Name}} with the same bit representation as x. +func (x {{.Tsrc.Name}}) As{{.Tdst.Name}}() {{.Tdst.Name}} {{end}} {{define "mask"}} -// To{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}} +// To{{.VectorCounterpart}} converts from {{.Name}} to {{.VectorCounterpart}}. func (from {{.Name}}) To{{.VectorCounterpart}}() (to {{.VectorCounterpart}}) -// asMask converts from {{.VectorCounterpart}} to {{.Name}} +// asMask converts from {{.VectorCounterpart}} to {{.Name}}. func (from {{.VectorCounterpart}}) asMask() (to {{.Name}}) func (x {{.Name}}) And(y {{.Name}}) {{.Name}} diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdrules.go b/src/simd/archsimd/_gen/simdgen/gen_simdrules.go index 90c3fb620e..7a8823483a 100644 --- a/src/simd/archsimd/_gen/simdgen/gen_simdrules.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdrules.go @@ -275,7 +275,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { origArgs = after } immArg = "[c] " - immArgCombineOff = " [makeValAndOff(int32(int8(c)),off)] " + immArgCombineOff = " [makeValAndOff(int32(uint8(c)),off)] " } memOpData.ArgsLoadAddr = immArg + origArgs + fmt.Sprintf("l:(VMOVDQUload%d {sym} [off] ptr mem)", *lastVreg.Bits) // Remove the last vreg from the arg and change it to "ptr". diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdssa.go b/src/simd/archsimd/_gen/simdgen/gen_simdssa.go index c9d8693aa1..876ffabe3d 100644 --- a/src/simd/archsimd/_gen/simdgen/gen_simdssa.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdssa.go @@ -13,9 +13,7 @@ import ( ) var ( - ssaTemplates = template.Must(template.New("simdSSA").Parse(` -{{define "header"}}// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. - + ssaTemplates = template.Must(template.New("simdSSA").Parse(`{{define "header"}}` + generatedHeader + ` package amd64 import ( diff --git a/src/simd/archsimd/_gen/simdgen/godefs.go b/src/simd/archsimd/_gen/simdgen/godefs.go index 2c10377420..e956c1cd1d 100644 --- a/src/simd/archsimd/_gen/simdgen/godefs.go +++ b/src/simd/archsimd/_gen/simdgen/godefs.go @@ -135,6 +135,19 @@ func (o *Operation) DecodeUnified(v *unify.Value) error { o.In = append(o.rawOperation.In, o.rawOperation.InVariant...) + // For down conversions, the high elements are zeroed if the result has more elements. + // TODO: we should encode this logic in the YAML file, instead of hardcoding it here. + if len(o.In) > 0 && len(o.Out) > 0 { + inLanes := o.In[0].Lanes + outLanes := o.Out[0].Lanes + if inLanes != nil && outLanes != nil && *inLanes < *outLanes { + if (strings.Contains(o.Go, "Saturate") || strings.Contains(o.Go, "Truncate")) && + !strings.Contains(o.Go, "Concat") { + o.Documentation += "\n// Results are packed to low elements in the returned vector, its upper elements are zeroed." + } + } + } + return nil } @@ -362,7 +375,7 @@ func compareNatural(s1, s2 string) int { return strings.Compare(s1, s2) } -const generatedHeader = `// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +const generatedHeader = `// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. ` func writeGoDefs(path string, cl unify.Closure) error { diff --git a/src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml index 35e8104218..ac5bd825db 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/AddSub/categories.yaml @@ -17,21 +17,83 @@ // NAME subtracts corresponding elements of two vectors with saturation. - go: AddPairs commutative: false + out: + - elemBits: 16|32 documentation: !string |- // NAME horizontally adds adjacent pairs of elements. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. +- go: AddPairs + commutative: false + out: + - elemBits: 64 + documentation: !string |- + // NAME horizontally adds adjacent pairs of elements. + // For x = [x0, x1] and y = [y0, y1], the result is [x0+x1, y0+y1]. - go: SubPairs commutative: false + out: + - elemBits: 16|32 documentation: !string |- // NAME horizontally subtracts adjacent pairs of elements. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. +- go: SubPairs + commutative: false + out: + - elemBits: 64 + documentation: !string |- + // NAME horizontally subtracts adjacent pairs of elements. + // For x = [x0, x1] and y = [y0, y1], the result is [x0-x1, y0-y1]. - go: AddPairsSaturated commutative: false documentation: !string |- // NAME horizontally adds adjacent pairs of elements with saturation. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. - go: SubPairsSaturated commutative: false documentation: !string |- // NAME horizontally subtracts adjacent pairs of elements with saturation. - // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. + // For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. +- go: AddPairsGrouped + commutative: false + out: + - elemBits: 16|32 + documentation: !string |- + // NAME horizontally adds adjacent pairs of elements. + // With each 128-bit as a group: + // for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. +- go: AddPairsGrouped + commutative: false + out: + - elemBits: 64 + documentation: !string |- + // NAME horizontally adds adjacent pairs of elements. + // With each 128-bit as a group: + // for x = [x0, x1] and y = [y0, y1], the result is [x0+x1, y0+y1]. +- go: SubPairsGrouped + commutative: false + out: + - elemBits: 16|32 + documentation: !string |- + // NAME horizontally subtracts adjacent pairs of elements. + // With each 128-bit as a group: + // for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. +- go: SubPairsGrouped + commutative: false + out: + - elemBits: 64 + documentation: !string |- + // NAME horizontally subtracts adjacent pairs of elements. + // With each 128-bit as a group: + // for x = [x0, x1] and y = [y0, y1], the result is [x0-x1, y0-y1]. +- go: AddPairsSaturatedGrouped + commutative: false + documentation: !string |- + // NAME horizontally adds adjacent pairs of elements with saturation. + // With each 128-bit as a group: + // for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. +- go: SubPairsSaturatedGrouped + commutative: false + documentation: !string |- + // NAME horizontally subtracts adjacent pairs of elements with saturation. + // With each 128-bit as a group: + // for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. diff --git a/src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml index 4423d8c7c6..17cee597d9 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/AddSub/go.yaml @@ -53,25 +53,71 @@ - *uint - go: AddPairs asm: "VPHADD[DW]" - in: *2any - out: *1any + in: &2any128 + - &any128 + go: $t + bits: 128 + - *any128 + out: &1any128 + - *any128 - go: SubPairs asm: "VPHSUB[DW]" - in: *2any - out: *1any + in: *2any128 + out: *1any128 - go: AddPairs asm: "VHADDP[SD]" # floats - in: *2any - out: *1any + in: *2any128 + out: *1any128 - go: SubPairs asm: "VHSUBP[SD]" # floats - in: *2any - out: *1any + in: *2any128 + out: *1any128 - go: AddPairsSaturated asm: "VPHADDS[DW]" - in: *2int - out: *1int + in: &2int128 + - &int128 + go: $t + base: int + bits: 128 + - *int128 + out: &1int128 + - *int128 - go: SubPairsSaturated asm: "VPHSUBS[DW]" - in: *2int - out: *1int + in: *2int128 + out: *1int128 +- go: AddPairsGrouped + asm: "VPHADD[DW]" + in: &2any256 + - &any256 + go: $t + bits: 256 + - *any256 + out: &1any256 + - *any256 +- go: SubPairsGrouped + asm: "VPHSUB[DW]" + in: *2any256 + out: *1any256 +- go: AddPairsGrouped + asm: "VHADDP[SD]" # floats + in: *2any256 + out: *1any256 +- go: SubPairsGrouped + asm: "VHSUBP[SD]" # floats + in: *2any256 + out: *1any256 +- go: AddPairsSaturatedGrouped + asm: "VPHADDS[DW]" + in: &2int256 + - &int256 + go: $t + base: int + bits: 256 + - *int256 + out: &1int256 + - *int256 +- go: SubPairsSaturatedGrouped + asm: "VPHSUBS[DW]" + in: *2int256 + out: *1int256 diff --git a/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml index 4b639d7a34..97ee587503 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml @@ -10,34 +10,29 @@ constImm: 0 commutative: true documentation: !string |- - // NAME returns x equals y, elementwise. + // NAME returns a mask whose elements indicate whether x == y. - go: Less constImm: 1 commutative: false documentation: !string |- - // NAME returns x less-than y, elementwise. + // NAME returns a mask whose elements indicate whether x < y. - go: LessEqual constImm: 2 commutative: false documentation: !string |- - // NAME returns x less-than-or-equals y, elementwise. -- go: IsNan # For float only. - constImm: 3 - commutative: true - documentation: !string |- - // NAME checks if elements are NaN. Use as x.IsNan(x). + // NAME returns a mask whose elements indicate whether x <= y. - go: NotEqual constImm: 4 commutative: true documentation: !string |- - // NAME returns x not-equals y, elementwise. + // NAME returns a mask whose elements indicate whether x != y. - go: GreaterEqual constImm: 13 commutative: false documentation: !string |- - // NAME returns x greater-than-or-equals y, elementwise. + // NAME returns a mask whose elements indicate whether x >= y. - go: Greater constImm: 14 commutative: false documentation: !string |- - // NAME returns x greater-than y, elementwise. + // NAME returns a mask whose elements indicate whether x > y. diff --git a/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml index 3f6c8a45b6..6dbfb57343 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml @@ -121,7 +121,7 @@ - class: mask # Floats -- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) regexpTag: "compares" asm: "VCMPP[SD]" in: @@ -135,7 +135,7 @@ - go: $t overwriteBase: int overwriteClass: mask -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) regexpTag: "compares" asm: "VCMPP[SD]" in: diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml index dd33284063..698e6d9956 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml @@ -44,124 +44,174 @@ // NAME converts element values to float64. # Int <-> Int conversions -- go: "(Extend|Saturate|Truncate)?ToInt8" +- go: "TruncateToInt8" commutative: false regexpTag: "convert" documentation: !string |- - // NAME converts element values to int8. -- go: "(Extend|Saturate|Truncate)?ToInt16(Concat)?" + // NAME truncates element values to int8. +- go: "SaturateToInt8" commutative: false regexpTag: "convert" documentation: !string |- - // NAME converts element values to int16. -- go: "(Extend|Saturate|Truncate)?ToInt32" + // NAME converts element values to int8 with signed saturation. +- go: "ExtendToInt16(Concat)?" commutative: false regexpTag: "convert" documentation: !string |- - // NAME converts element values to int32. -- go: "(Extend|Saturate|Truncate)?ToInt64" + // NAME sign-extends element values to int16. +- go: "TruncateToInt16(Concat)?" commutative: false regexpTag: "convert" documentation: !string |- - // NAME converts element values to int64. -- go: "(Extend|Saturate|Truncate)?ToUint8" + // NAME truncates element values to int16. +- go: "SaturateToInt16(Concat(Grouped)?)?" commutative: false regexpTag: "convert" documentation: !string |- - // NAME converts element values to uint8. -- go: "(Extend|Saturate|Truncate)?ToUint16(Concat)?" + // NAME converts element values to int16 with signed saturation. +- go: "ExtendToInt32" commutative: false regexpTag: "convert" documentation: !string |- - // NAME converts element values to uint16. -- go: "(Extend|Saturate|Truncate)?ToUint32" + // NAME sign-extends element values to int32. +- go: "TruncateToInt32" + commutative: false regexpTag: "convert" + documentation: !string |- + // NAME truncates element values to int32. +- go: "SaturateToInt32" commutative: false + regexpTag: "convert" documentation: !string |- - // NAME converts element values to uint32. -- go: "(Extend|Saturate|Truncate)?ToUint64" + // NAME converts element values to int32 with signed saturation. +- go: "ExtendToInt64" + commutative: false regexpTag: "convert" + documentation: !string |- + // NAME sign-extends element values to int64. +- go: "TruncateToUint8" commutative: false + regexpTag: "convert" documentation: !string |- - // NAME converts element values to uint64. + // NAME truncates element values to uint8. +- go: "SaturateToUint8" + commutative: false + regexpTag: "convert" + documentation: !string |- + // NAME converts element values to uint8 with unsigned saturation. +- go: "ExtendToUint16(Concat)?" + commutative: false + regexpTag: "convert" + documentation: !string |- + // NAME zero-extends element values to uint16. +- go: "TruncateToUint16(Concat)?" + commutative: false + regexpTag: "convert" + documentation: !string |- + // NAME truncates element values to uint16. +- go: "SaturateToUint16(Concat(Grouped)?)?" + commutative: false + regexpTag: "convert" + documentation: !string |- + // NAME converts element values to uint16 with unsigned saturation. +- go: "ExtendToUint32" + regexpTag: "convert" + commutative: false + documentation: !string |- + // NAME zero-extends element values to uint32. +- go: "TruncateToUint32" + regexpTag: "convert" + commutative: false + documentation: !string |- + // NAME truncates element values to uint32. +- go: "SaturateToUint32" + regexpTag: "convert" + commutative: false + documentation: !string |- + // NAME converts element values to uint32 with unsigned saturation. +- go: "ExtendToUint64" + regexpTag: "convert" + commutative: false + documentation: !string |- + // NAME zero-extends element values to uint64. # low-part only Int <-> Int conversions -- go: ExtendLo8ToUint16x8 +- go: ExtendLo8ToUint16 commutative: false documentation: !string |- - // NAME converts 8 lowest vector element values to uint16. -- go: ExtendLo8ToInt16x8 + // NAME zero-extends 8 lowest vector element values to uint16. +- go: ExtendLo8ToInt16 commutative: false documentation: !string |- - // NAME converts 8 lowest vector element values to int16. -- go: ExtendLo4ToUint32x4 + // NAME sign-extends 8 lowest vector element values to int16. +- go: ExtendLo4ToUint32 commutative: false documentation: !string |- - // NAME converts 4 lowest vector element values to uint32. -- go: ExtendLo4ToInt32x4 + // NAME zero-extends 4 lowest vector element values to uint32. +- go: ExtendLo4ToInt32 commutative: false documentation: !string |- - // NAME converts 4 lowest vector element values to int32. -- go: ExtendLo2ToUint64x2 + // NAME sign-extends 4 lowest vector element values to int32. +- go: ExtendLo2ToUint64 commutative: false documentation: !string |- - // NAME converts 2 lowest vector element values to uint64. -- go: ExtendLo2ToInt64x2 + // NAME zero-extends 2 lowest vector element values to uint64. +- go: ExtendLo2ToInt64 commutative: false documentation: !string |- - // NAME converts 2 lowest vector element values to int64. -- go: ExtendLo2ToUint64x2 + // NAME sign-extends 2 lowest vector element values to int64. +- go: ExtendLo2ToUint64 commutative: false documentation: !string |- - // NAME converts 2 lowest vector element values to uint64. -- go: ExtendLo4ToUint64x4 + // NAME zero-extends 2 lowest vector element values to uint64. +- go: ExtendLo4ToUint64 commutative: false documentation: !string |- - // NAME converts 4 lowest vector element values to uint64. -- go: ExtendLo2ToInt64x2 + // NAME zero-extends 4 lowest vector element values to uint64. +- go: ExtendLo2ToInt64 commutative: false documentation: !string |- - // NAME converts 2 lowest vector element values to int64. -- go: ExtendLo4ToInt64x4 + // NAME sign-extends 2 lowest vector element values to int64. +- go: ExtendLo4ToInt64 commutative: false documentation: !string |- - // NAME converts 4 lowest vector element values to int64. -- go: ExtendLo4ToUint32x4 + // NAME sign-extends 4 lowest vector element values to int64. +- go: ExtendLo4ToUint32 commutative: false documentation: !string |- - // NAME converts 4 lowest vector element values to uint32. -- go: ExtendLo8ToUint32x8 + // NAME zero-extends 4 lowest vector element values to uint32. +- go: ExtendLo8ToUint32 commutative: false documentation: !string |- - // NAME converts 8 lowest vector element values to uint32. -- go: ExtendLo4ToInt32x4 + // NAME zero-extends 8 lowest vector element values to uint32. +- go: ExtendLo4ToInt32 commutative: false documentation: !string |- - // NAME converts 4 lowest vector element values to int32. -- go: ExtendLo8ToInt32x8 + // NAME sign-extends 4 lowest vector element values to int32. +- go: ExtendLo8ToInt32 commutative: false documentation: !string |- - // NAME converts 8 lowest vector element values to int32. -- go: ExtendLo2ToUint64x2 + // NAME sign-extends 8 lowest vector element values to int32. +- go: ExtendLo2ToUint64 commutative: false documentation: !string |- - // NAME converts 2 lowest vector element values to uint64. -- go: ExtendLo4ToUint64x4 + // NAME zero-extends 2 lowest vector element values to uint64. +- go: ExtendLo4ToUint64 commutative: false documentation: !string |- - // NAME converts 4 lowest vector element values to uint64. -- go: ExtendLo8ToUint64x8 + // NAME zero-extends 4 lowest vector element values to uint64. +- go: ExtendLo8ToUint64 commutative: false documentation: !string |- - // NAME converts 8 lowest vector element values to uint64. -- go: ExtendLo2ToInt64x2 + // NAME zero-extends 8 lowest vector element values to uint64. +- go: ExtendLo2ToInt64 commutative: false documentation: !string |- - // NAME converts 2 lowest vector element values to int64. -- go: ExtendLo4ToInt64x4 + // NAME sign-extends 2 lowest vector element values to int64. +- go: ExtendLo4ToInt64 commutative: false documentation: !string |- - // NAME converts 4 lowest vector element values to int64. -- go: ExtendLo8ToInt64x8 + // NAME sign-extends 4 lowest vector element values to int64. +- go: ExtendLo8ToInt64 commutative: false documentation: !string |- - // NAME converts 8 lowest vector element values to int64.
\ No newline at end of file + // NAME sign-extends 8 lowest vector element values to int64. diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml index af058124fb..2f19d12616 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml @@ -138,9 +138,6 @@ # Widening integer conversions. # uint8 -> uint16 - go: ExtendToUint16 - addDoc: &zeroExtendDoc - !string |- - // The result vector's elements are zero-extended. regexpTag: "convert" asm: "VPMOVZXBW" in: @@ -156,7 +153,6 @@ - go: ExtendToUint16 regexpTag: "convert" asm: "VPMOVZXBW" - addDoc: *zeroExtendDoc in: - &u8x32 base: uint @@ -171,9 +167,6 @@ - go: ExtendToInt16 regexpTag: "convert" asm: "VPMOVSXBW" - addDoc: &signExtendDoc - !string |- - // The result vector's elements are sign-extended. in: - &i8x16 base: int @@ -187,7 +180,6 @@ - go: ExtendToInt16 regexpTag: "convert" asm: "VPMOVSXBW" - addDoc: *signExtendDoc in: - &i8x32 base: int @@ -202,7 +194,6 @@ - go: ExtendToUint32 regexpTag: "convert" asm: "VPMOVZXWD" - addDoc: *zeroExtendDoc in: - &u16x8 base: uint @@ -216,7 +207,6 @@ - go: ExtendToUint32 regexpTag: "convert" asm: "VPMOVZXWD" - addDoc: *zeroExtendDoc in: - *u16x16 out: @@ -228,7 +218,6 @@ - go: ExtendToInt32 regexpTag: "convert" asm: "VPMOVSXWD" - addDoc: *signExtendDoc in: - &i16x8 base: int @@ -242,7 +231,6 @@ - go: ExtendToInt32 regexpTag: "convert" asm: "VPMOVSXWD" - addDoc: *signExtendDoc in: - *i16x16 out: @@ -254,7 +242,6 @@ - go: ExtendToUint64 regexpTag: "convert" asm: "VPMOVZXDQ" - addDoc: *zeroExtendDoc in: - &u32x4 base: uint @@ -268,7 +255,6 @@ - go: ExtendToUint64 regexpTag: "convert" asm: "VPMOVZXDQ" - addDoc: *zeroExtendDoc in: - *u32x8 out: @@ -280,7 +266,6 @@ - go: ExtendToInt64 regexpTag: "convert" asm: "VPMOVSXDQ" - addDoc: *signExtendDoc in: - &i32x4 base: int @@ -294,7 +279,6 @@ - go: ExtendToInt64 regexpTag: "convert" asm: "VPMOVSXDQ" - addDoc: *signExtendDoc in: - *i32x8 out: @@ -306,7 +290,6 @@ - go: ExtendToUint64 regexpTag: "convert" asm: "VPMOVZXWQ" - addDoc: *zeroExtendDoc in: - *u16x8 out: @@ -315,7 +298,6 @@ - go: ExtendToInt64 regexpTag: "convert" asm: "VPMOVSXWQ" - addDoc: *signExtendDoc in: - *i16x8 out: @@ -324,7 +306,6 @@ - go: ExtendToUint32 regexpTag: "convert" asm: "VPMOVZXBD" - addDoc: *zeroExtendDoc in: - *u8x16 out: @@ -333,7 +314,6 @@ - go: ExtendToInt32 regexpTag: "convert" asm: "VPMOVSXBD" - addDoc: *signExtendDoc in: - *i8x16 out: @@ -342,10 +322,6 @@ - go: TruncateToInt8 regexpTag: "convert" asm: "VPMOV[WDQ]B" - addDoc: &truncDocZeroUpper - !string |- - // Conversion is done with truncation on the vector elements. - // Results are packed to low elements in the returned vector, its upper elements are zero-cleared. in: - base: int out: @@ -354,7 +330,6 @@ - go: TruncateToUint8 regexpTag: "convert" asm: "VPMOV[WDQ]B" - addDoc: *truncDocZeroUpper in: - base: uint out: @@ -363,9 +338,6 @@ - go: TruncateToInt8 regexpTag: "convert" asm: "VPMOV[WDQ]B" - addDoc: &truncDoc - !string |- - // Conversion is done with truncation on the vector elements. in: - base: int out: @@ -374,7 +346,6 @@ - go: TruncateToUint8 regexpTag: "convert" asm: "VPMOV[WDQ]B" - addDoc: *truncDoc in: - base: uint out: @@ -383,7 +354,6 @@ - go: TruncateToInt16 regexpTag: "convert" asm: "VPMOV[DQ]W" - addDoc: *truncDoc in: - base: int out: @@ -391,7 +361,6 @@ - go: TruncateToUint16 regexpTag: "convert" asm: "VPMOV[DQ]W" - addDoc: *truncDoc in: - base: uint out: @@ -399,7 +368,6 @@ - go: TruncateToInt32 regexpTag: "convert" asm: "VPMOVQD" - addDoc: *truncDoc in: - base: int out: @@ -407,7 +375,6 @@ - go: TruncateToUint32 regexpTag: "convert" asm: "VPMOVQD" - addDoc: *truncDoc in: - base: uint out: @@ -416,10 +383,6 @@ - go: SaturateToInt8 regexpTag: "convert" asm: "VPMOVS[WDQ]B" - addDoc: &satDocZeroUpper - !string |- - // Conversion is done with saturation on the vector elements. - // Results are packed to low elements in the returned vector, its upper elements are zero-cleared. in: - base: int out: @@ -427,19 +390,15 @@ bits: 128 - go: SaturateToUint8 regexpTag: "convert" - asm: "VPMOVS[WDQ]B" - addDoc: *satDocZeroUpper + asm: "VPMOVUS[WDQ]B" in: - - base: int + - base: uint out: - - base: int + - base: uint bits: 128 - go: SaturateToInt8 regexpTag: "convert" asm: "VPMOVS[WDQ]B" - addDoc: &satDoc - !string |- - // Conversion is done with saturation on the vector elements. in: - base: int out: @@ -448,7 +407,6 @@ - go: SaturateToUint8 regexpTag: "convert" asm: "VPMOVUS[WDQ]B" - addDoc: *satDoc in: - base: uint out: @@ -457,7 +415,6 @@ - go: SaturateToInt16 regexpTag: "convert" asm: "VPMOVS[DQ]W" - addDoc: *satDoc in: - base: int out: @@ -465,7 +422,6 @@ - go: SaturateToUint16 regexpTag: "convert" asm: "VPMOVUS[DQ]W" - addDoc: *satDoc in: - base: uint out: @@ -473,7 +429,6 @@ - go: SaturateToInt32 regexpTag: "convert" asm: "VPMOVSQD" - addDoc: *satDoc in: - base: int out: @@ -481,7 +436,6 @@ - go: SaturateToUint32 regexpTag: "convert" asm: "VPMOVUSQD" - addDoc: *satDoc in: - base: uint out: @@ -492,67 +446,86 @@ asm: "VPACKSSDW" addDoc: &satDocConcat !string |- + // The converted elements from x will be packed to the lower part of the result vector, + // the converted elements from y will be packed to the upper part of the result vector. + in: + - base: int + - base: int + out: + - base: int + bits: 128 +- go: SaturateToInt16ConcatGrouped + regexpTag: "convert" + asm: "VPACKSSDW" + addDoc: &satDocConcatGrouped + !string |- // With each 128-bit as a group: - // The converted group from the first input vector will be packed to the lower part of the result vector, - // the converted group from the second input vector will be packed to the upper part of the result vector. - // Conversion is done with saturation on the vector elements. + // The converted elements from x will be packed to the lower part of the group in the result vector, + // the converted elements from y will be packed to the upper part of the group in the result vector. in: - base: int - base: int out: - base: int + bits: 256|512 - go: SaturateToUint16Concat regexpTag: "convert" asm: "VPACKUSDW" addDoc: *satDocConcat in: + - base: int + - base: int + out: - base: uint - - base: uint + bits: 128 +- go: SaturateToUint16ConcatGrouped + regexpTag: "convert" + asm: "VPACKUSDW" + addDoc: *satDocConcatGrouped + in: + - base: int + - base: int out: - base: uint + bits: 256|512 # low-part only conversions. # uint8->uint16 -- go: ExtendLo8ToUint16x8 +- go: ExtendLo8ToUint16 regexpTag: "convert" asm: "VPMOVZXBW" - addDoc: *zeroExtendDoc in: - *u8x16 out: - *u16x8 # int8->int16 -- go: ExtendLo8ToInt16x8 +- go: ExtendLo8ToInt16 regexpTag: "convert" asm: "VPMOVSXBW" - addDoc: *signExtendDoc in: - *i8x16 out: - *i16x8 # uint16->uint32 -- go: ExtendLo4ToUint32x4 +- go: ExtendLo4ToUint32 regexpTag: "convert" asm: "VPMOVZXWD" - addDoc: *zeroExtendDoc in: - *u16x8 out: - *u32x4 # int16->int32 -- go: ExtendLo4ToInt32x4 +- go: ExtendLo4ToInt32 regexpTag: "convert" asm: "VPMOVSXWD" - addDoc: *signExtendDoc in: - *i16x8 out: - *i32x4 # uint32 -> uint64 -- go: ExtendLo2ToUint64x2 +- go: ExtendLo2ToUint64 regexpTag: "convert" asm: "VPMOVZXDQ" - addDoc: *zeroExtendDoc in: - *u32x4 out: @@ -561,10 +534,9 @@ elemBits: 64 bits: 128 # int32 -> int64 -- go: ExtendLo2ToInt64x2 +- go: ExtendLo2ToInt64 regexpTag: "convert" asm: "VPMOVSXDQ" - addDoc: *signExtendDoc in: - *i32x4 out: @@ -573,120 +545,106 @@ elemBits: 64 bits: 128 # uint16 -> uint64 -- go: ExtendLo2ToUint64x2 +- go: ExtendLo2ToUint64 regexpTag: "convert" asm: "VPMOVZXWQ" - addDoc: *zeroExtendDoc in: - *u16x8 out: - *u64x2 -- go: ExtendLo4ToUint64x4 +- go: ExtendLo4ToUint64 regexpTag: "convert" asm: "VPMOVZXWQ" - addDoc: *zeroExtendDoc in: - *u16x8 out: - *u64x4 # int16 -> int64 -- go: ExtendLo2ToInt64x2 +- go: ExtendLo2ToInt64 regexpTag: "convert" asm: "VPMOVSXWQ" - addDoc: *signExtendDoc in: - *i16x8 out: - *i64x2 -- go: ExtendLo4ToInt64x4 +- go: ExtendLo4ToInt64 regexpTag: "convert" asm: "VPMOVSXWQ" - addDoc: *signExtendDoc in: - *i16x8 out: - *i64x4 # uint8 -> uint32 -- go: ExtendLo4ToUint32x4 +- go: ExtendLo4ToUint32 regexpTag: "convert" asm: "VPMOVZXBD" - addDoc: *zeroExtendDoc in: - *u8x16 out: - *u32x4 -- go: ExtendLo8ToUint32x8 +- go: ExtendLo8ToUint32 regexpTag: "convert" asm: "VPMOVZXBD" - addDoc: *zeroExtendDoc in: - *u8x16 out: - *u32x8 # int8 -> int32 -- go: ExtendLo4ToInt32x4 +- go: ExtendLo4ToInt32 regexpTag: "convert" asm: "VPMOVSXBD" - addDoc: *signExtendDoc in: - *i8x16 out: - *i32x4 -- go: ExtendLo8ToInt32x8 +- go: ExtendLo8ToInt32 regexpTag: "convert" asm: "VPMOVSXBD" - addDoc: *signExtendDoc in: - *i8x16 out: - *i32x8 # uint8 -> uint64 -- go: ExtendLo2ToUint64x2 +- go: ExtendLo2ToUint64 regexpTag: "convert" asm: "VPMOVZXBQ" - addDoc: *zeroExtendDoc in: - *u8x16 out: - *u64x2 -- go: ExtendLo4ToUint64x4 +- go: ExtendLo4ToUint64 regexpTag: "convert" asm: "VPMOVZXBQ" - addDoc: *zeroExtendDoc in: - *u8x16 out: - *u64x4 -- go: ExtendLo8ToUint64x8 +- go: ExtendLo8ToUint64 regexpTag: "convert" asm: "VPMOVZXBQ" - addDoc: *zeroExtendDoc in: - *u8x16 out: - *u64x8 # int8 -> int64 -- go: ExtendLo2ToInt64x2 +- go: ExtendLo2ToInt64 regexpTag: "convert" asm: "VPMOVSXBQ" - addDoc: *signExtendDoc in: - *i8x16 out: - *i64x2 -- go: ExtendLo4ToInt64x4 +- go: ExtendLo4ToInt64 regexpTag: "convert" asm: "VPMOVSXBQ" - addDoc: *signExtendDoc in: - *i8x16 out: - *i64x4 -- go: ExtendLo8ToInt64x8 +- go: ExtendLo8ToInt64 regexpTag: "convert" asm: "VPMOVSXBQ" - addDoc: *signExtendDoc in: - *i8x16 out: - - *i64x8
\ No newline at end of file + - *i64x8 diff --git a/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml index f2d8af6886..90f5208ff7 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/FPonlyArith/categories.yaml @@ -18,12 +18,13 @@ - go: Scale commutative: false documentation: !string |- - // NAME multiplies elements by a power of 2. + // NAME multiplies each element of x by 2 raised to the power of the + // floor of the corresponding element in y. - go: RoundToEven commutative: false constImm: 0 documentation: !string |- - // NAME rounds elements to the nearest integer. + // NAME rounds elements to the nearest integer, rounding ties to even. - go: RoundToEvenScaled commutative: false constImm: 0 diff --git a/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml index bf33642a11..ae6554d731 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/IntOnlyArith/categories.yaml @@ -12,8 +12,8 @@ # Applies sign of second operand to first: sign(val, sign_src) commutative: false documentation: !string |- - // NAME returns the product of the first operand with -1, 0, or 1, - // whichever constant is nearest to the value of the second operand. + // NAME returns the product of x with -1, 0, or 1, + // whichever constant is nearest to the value of y. # Sign does not have masked version - go: OnesCount commutative: false diff --git a/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml index 2b1da7adaf..54a8ece574 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/MLOps/categories.yaml @@ -10,21 +10,10 @@ documentation: !string |- // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. -# QuadDotProduct, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. # - go: DotProductBroadcast # commutative: true # # documentation: !string |- # // NAME multiplies all elements and broadcasts the sum. -- go: DotProductQuadruple - commutative: false - documentation: !string |- - // NAME performs dot products on groups of 4 elements of x and y. - // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction. -- go: DotProductQuadrupleSaturated - commutative: false - documentation: !string |- - // NAME multiplies performs dot products on groups of 4 elements of x and y. - // NAME(x, y).Add(z) will be optimized to the full form of the underlying instruction. - go: AddDotProductPairs commutative: false noTypes: "true" diff --git a/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml index 4a1195b52d..18ce8a53b2 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/MLOps/go.yaml @@ -33,33 +33,6 @@ # const: 127 # out: # - *dpb_src -- go: DotProductQuadruple - asm: "VPDPBUSD" - operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0 - in: - - &qdpa_acc - go: $t_acc - base: int - elemBits: 32 - - &qdpa_src1 - go: $t_src1 - base: uint - overwriteElementBits: 8 - - &qdpa_src2 - go: $t_src2 - base: int - overwriteElementBits: 8 - out: - - *qdpa_acc -- go: DotProductQuadrupleSaturated - asm: "VPDPBUSDS" - operandOrder: "31Zero3" # switch operand 3 and 1, and make 3 always 0 - in: - - *qdpa_acc - - *qdpa_src1 - - *qdpa_src2 - out: - - *qdpa_acc - go: AddDotProductPairs asm: "VPDPWSSD" in: diff --git a/src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml index a7e30f4693..1d79d85a46 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/MinMax/categories.yaml @@ -2,8 +2,8 @@ - go: Max commutative: true documentation: !string |- - // NAME computes the maximum of corresponding elements. + // NAME computes the maximum of each pair of corresponding elements in x and y. - go: Min commutative: true documentation: !string |- - // NAME computes the minimum of corresponding elements. + // NAME computes the minimum of each pair of corresponding elements in x and y. diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml index 3c86974e8a..38bc9374cc 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml @@ -31,17 +31,23 @@ commutative: false documentation: !string |- // NAME performs a full permutation of vector x using indices: - // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // + // result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // - go: Permute commutative: false documentation: !string |- // NAME performs a full permutation of vector x using indices: - // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // + // result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} + // - go: ConcatPermute # ConcatPermute is only available on or after AVX512 commutative: false documentation: !string |- // NAME performs a full permutation of vector x, y using indices: - // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} + // + // result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} + // // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. - go: Compress @@ -236,12 +242,12 @@ - go: ConcatShiftBytesRight commutative: false documentation: !string |- - // NAME concatenates x and y and shift it right by constant bytes. + // NAME concatenates x and y and shift it right by shift bytes. // The result vector will be the lower half of the concatenated vector. - go: ConcatShiftBytesRightGrouped commutative: false documentation: !string |- - // NAME concatenates x and y and shift it right by constant bytes. + // NAME concatenates x and y and shift it right by shift bytes. // The result vector will be the lower half of the concatenated vector. // This operation is performed grouped by each 16 byte. diff --git a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml index 726a983ac4..e1fd184ed7 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml @@ -227,7 +227,7 @@ - go: Permute asm: "VPERMQ|VPERMPD" addDoc: !string |- - // The low 2 bits (values 0-3) of each element of indices is used + // The low 2 bits (values 0-3) of each element of indices is used. operandOrder: "21Type1" in: - &anyindices @@ -244,7 +244,7 @@ - go: Permute asm: "VPERM[WDQ]|VPERMP[SD]" addDoc: !string |- - // The low 3 bits (values 0-7) of each element of indices is used + // The low 3 bits (values 0-7) of each element of indices is used. operandOrder: "21Type1" in: - *anyindices @@ -257,7 +257,7 @@ - go: Permute asm: "VPERM[BWD]|VPERMPS" addDoc: !string |- - // The low 4 bits (values 0-15) of each element of indices is used + // The low 4 bits (values 0-15) of each element of indices is used. operandOrder: "21Type1" in: - *anyindices @@ -270,7 +270,7 @@ - go: Permute asm: "VPERM[BW]" addDoc: !string |- - // The low 5 bits (values 0-31) of each element of indices is used + // The low 5 bits (values 0-31) of each element of indices is used. operandOrder: "21Type1" in: - *anyindices @@ -283,7 +283,7 @@ - go: Permute asm: "VPERMB" addDoc: !string |- - // The low 6 bits (values 0-63) of each element of indices is used + // The low 6 bits (values 0-63) of each element of indices is used. operandOrder: "21Type1" in: - *anyindices @@ -489,7 +489,9 @@ - go: PermuteOrZeroGrouped asm: VPSHUFB addDoc: !string |- - // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} + // + // result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} + // // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -506,7 +508,9 @@ - go: permuteScalars asm: VPSHUFD addDoc: !string |- - // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} + // + // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} + // // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. in: - *128any @@ -520,7 +524,9 @@ - go: permuteScalarsGrouped asm: VPSHUFD addDoc: !string |- - // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} + // + // result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} + // // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. in: @@ -535,7 +541,9 @@ - go: permuteScalarsLo asm: VPSHUFLW addDoc: !string |- - // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} + // + // result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} + // // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. in: - &128lanes8 @@ -573,7 +581,9 @@ - go: permuteScalarsHi asm: VPSHUFHW addDoc: !string |- - // result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} + // + // result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} + // // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. in: - *128lanes8 @@ -1001,6 +1011,7 @@ - *uint128 - class: immediate immOffset: 0 + name: shift out: - *uint128 @@ -1014,5 +1025,6 @@ - *uint256512 - class: immediate immOffset: 0 + name: shift out: - *uint256512 diff --git a/src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml index 92491b51d4..bb020ed48f 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Mul/categories.yaml @@ -7,7 +7,7 @@ commutative: true documentation: !string |- // NAME multiplies even-indexed elements, widening the result. - // Result[i] = v1.Even[i] * v2.Even[i]. + // Result[i] = v1[2*i] * v2[2*i]. - go: MulHigh commutative: true documentation: !string |- diff --git a/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml index 0d0b006cfb..0d205aab79 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/ShiftRotate/categories.yaml @@ -4,21 +4,21 @@ specialLower: sftimm commutative: false documentation: !string |- - // NAME shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. + // NAME shifts each element to the left by y bits. - go: ShiftAllRight signed: false nameAndSizeCheck: true specialLower: sftimm commutative: false documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. + // NAME performs an unsigned right shift on each element by y bits. - go: ShiftAllRight signed: true specialLower: sftimm nameAndSizeCheck: true commutative: false documentation: !string |- - // NAME shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. + // NAME performs a signed right shift on each element by y bits. - go: shiftAllLeftConst # no APIs, only ssa ops. noTypes: "true" noGenericOps: "true" @@ -44,24 +44,24 @@ nameAndSizeCheck: true commutative: false documentation: !string |- - // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. + // NAME shifts each element in x to the left by the number of bits specified in y's corresponding elements. - go: ShiftRight signed: false nameAndSizeCheck: true commutative: false documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. + // NAME performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. - go: ShiftRight signed: true nameAndSizeCheck: true commutative: false documentation: !string |- - // NAME shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. + // NAME performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. - go: RotateAllLeft nameAndSizeCheck: true commutative: false documentation: !string |- - // NAME rotates each element to the left by the number of bits specified by the immediate. + // NAME rotates each element to the left by the number of bits specified by shift. - go: RotateLeft nameAndSizeCheck: true commutative: false @@ -71,7 +71,7 @@ nameAndSizeCheck: true commutative: false documentation: !string |- - // NAME rotates each element to the right by the number of bits specified by the immediate. + // NAME rotates each element to the right by the number of bits specified by shift. - go: RotateRight nameAndSizeCheck: true commutative: false @@ -81,23 +81,23 @@ nameAndSizeCheck: true commutative: false documentation: !string |- - // NAME shifts each element of x to the left by the number of bits specified by the - // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. + // NAME shifts each element of x to the left by the number of bits specified by + // shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. - go: ShiftAllRightConcat nameAndSizeCheck: true commutative: false documentation: !string |- - // NAME shifts each element of x to the right by the number of bits specified by the - // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. + // NAME shifts each element of x to the right by the number of bits specified by + // shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. - go: ShiftLeftConcat nameAndSizeCheck: true commutative: false documentation: !string |- // NAME shifts each element of x to the left by the number of bits specified by the - // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. + // corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. - go: ShiftRightConcat nameAndSizeCheck: true commutative: false documentation: !string |- // NAME shifts each element of x to the right by the number of bits specified by the - // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. + // corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. diff --git a/src/simd/archsimd/_gen/tmplgen/main.go b/src/simd/archsimd/_gen/tmplgen/main.go index 473e4f14c0..8db185e1e0 100644 --- a/src/simd/archsimd/_gen/tmplgen/main.go +++ b/src/simd/archsimd/_gen/tmplgen/main.go @@ -40,17 +40,23 @@ func (sat shapeAndTemplate) target(outType string, width int) shapeAndTemplate { newSat := sat newShape := *sat.s newShape.output = func(t string, w, c int) (ot string, ow int, oc int) { - return outType, width, c + oc = c + if width*c > 512 { + oc = 512 / width + } else if width*c < 128 { + oc = 128 / width + } + return outType, width, oc } newSat.s = &newShape return newSat } -func (sat shapeAndTemplate) shrinkTo(outType string, by int) shapeAndTemplate { +func (sat shapeAndTemplate) targetFixed(outType string, width, count int) shapeAndTemplate { newSat := sat newShape := *sat.s newShape.output = func(t string, w, c int) (ot string, ow int, oc int) { - return outType, w / by, c * by + return outType, width, count } newSat.s = &newShape return newSat @@ -98,6 +104,17 @@ var uintShapes = &shapes{ uints: []int{8, 16, 32, 64}, } +var floatShapes = &shapes{ + vecs: []int{128, 256, 512}, + floats: []int{32, 64}, +} + +var integerShapes = &shapes{ + vecs: []int{128, 256, 512}, + ints: []int{8, 16, 32, 64}, + uints: []int{8, 16, 32, 64}, +} + var avx512Shapes = &shapes{ vecs: []int{512}, ints: []int{8, 16, 32, 64}, @@ -278,7 +295,7 @@ func testPrologue(t, s string, out io.Writer) { fmt.Fprintf(out, `// Code generated by '%s'; DO NOT EDIT. -//go:build goexperiment.simd +//go:build goexperiment.simd && amd64 // This file contains functions testing %s. // Each function in this file is specialized for a @@ -311,12 +328,12 @@ func shapedTemplateOf(s *shapes, name, temp string) shapeAndTemplate { } var sliceTemplate = templateOf("slice", ` -// Load{{.VType}}Slice loads {{.AOrAn}} {{.VType}} from a slice of at least {{.Count}} {{.Etype}}s +// Load{{.VType}}Slice loads {{.AOrAn}} {{.VType}} from a slice of at least {{.Count}} {{.Etype}}s. func Load{{.VType}}Slice(s []{{.Etype}}) {{.VType}} { return Load{{.VType}}((*[{{.Count}}]{{.Etype}})(s)) } -// StoreSlice stores x into a slice of at least {{.Count}} {{.Etype}}s +// StoreSlice stores x into a slice of at least {{.Count}} {{.Etype}}s. func (x {{.VType}}) StoreSlice(s []{{.Etype}}) { x.Store((*[{{.Count}}]{{.Etype}})(s)) } @@ -356,15 +373,49 @@ func test{{.VType}}UnaryFlaky(t *testing.T, f func(x archsimd.{{.VType}}) archsi `) var convertTemplate = templateOf("convert_helpers", ` -// test{{.VType}}ConvertTo{{.OEType}} tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// test{{.VType}}ConvertTo{{.OEType}} tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) { n := {{.Count}} t.Helper() forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool { t.Helper() a := archsimd.Load{{.VType}}Slice(x) - g := make([]{{.OEtype}}, n) + g := make([]{{.OEtype}}, {{.OCount}}) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)}) + }) +} +`) + +var ( + // templates and shapes for conversion. + // TODO: this includes shapes where in and out have the same element type, + // which are not needed. + unaryToInt8 = convertTemplate.target("int", 8) + unaryToUint8 = convertTemplate.target("uint", 8) + unaryToInt16 = convertTemplate.target("int", 16) + unaryToUint16 = convertTemplate.target("uint", 16) + unaryToInt32 = convertTemplate.target("int", 32) + unaryToUint32 = convertTemplate.target("uint", 32) + unaryToInt64 = convertTemplate.target("int", 64) + unaryToUint64 = convertTemplate.target("uint", 64) + unaryToFloat32 = convertTemplate.target("float", 32) + unaryToFloat64 = convertTemplate.target("float", 64) +) + +var convertLoTemplate = shapedTemplateOf(integerShapes, "convert_lo_helpers", ` +// test{{.VType}}ConvertLoTo{{.OVType}} tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low {{.OCount}} elements. +func test{{.VType}}ConvertLoTo{{.OVType}}(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.{{.OVType}}, want func(x []{{.Etype}}) []{{.OEtype}}) { + n := {{.Count}} + t.Helper() + forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool { + t.Helper() + a := archsimd.Load{{.VType}}Slice(x) + g := make([]{{.OEtype}}, {{.OCount}}) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)}) @@ -372,9 +423,23 @@ func test{{.VType}}ConvertTo{{.OEType}}(t *testing.T, f func(x archsimd.{{.VType } `) -var unaryToInt32 = convertTemplate.target("int", 32) -var unaryToUint32 = convertTemplate.target("uint", 32) -var unaryToUint16 = convertTemplate.target("uint", 16) +var ( + // templates and shapes for conversion of low elements. + // The output is fixed to 128- or 256-bits (no 512-bit, as the + // regular convertTemplate covers that). + // TODO: this includes shapes where in and out have the same element + // type or length, which are not needed. + unaryToInt64x2 = convertLoTemplate.targetFixed("int", 64, 2) + unaryToInt64x4 = convertLoTemplate.targetFixed("int", 64, 4) + unaryToUint64x2 = convertLoTemplate.targetFixed("uint", 64, 2) + unaryToUint64x4 = convertLoTemplate.targetFixed("uint", 64, 4) + unaryToInt32x4 = convertLoTemplate.targetFixed("int", 32, 4) + unaryToInt32x8 = convertLoTemplate.targetFixed("int", 32, 8) + unaryToUint32x4 = convertLoTemplate.targetFixed("uint", 32, 4) + unaryToUint32x8 = convertLoTemplate.targetFixed("uint", 32, 8) + unaryToInt16x8 = convertLoTemplate.targetFixed("int", 16, 8) + unaryToUint16x8 = convertLoTemplate.targetFixed("uint", 16, 8) +) var binaryTemplate = templateOf("binary_helpers", ` // test{{.VType}}Binary tests the simd binary method f against the expected behavior generated by want @@ -447,6 +512,22 @@ func test{{.VType}}Compare(t *testing.T, f func(_, _ archsimd.{{.VType}}) archsi } `) +var compareUnaryTemplate = shapedTemplateOf(floatShapes, "compare_unary_helpers", ` +// test{{.VType}}UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want +func test{{.VType}}UnaryCompare(t *testing.T, f func(x archsimd.{{.VType}}) archsimd.Mask{{.WxC}}, want func(x []{{.Etype}}) []int64) { + n := {{.Count}} + t.Helper() + forSlice(t, {{.Etype}}s, n, func(x []{{.Etype}}) bool { + t.Helper() + a := archsimd.Load{{.VType}}Slice(x) + g := make([]int{{.EWidth}}, n) + f(a).ToInt{{.WxC}}().StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, s64(g), w, 0.0, func() {t.Helper(); t.Logf("x=%v", x)}) + }) +} +`) + // TODO this has not been tested yet. var compareMaskedTemplate = templateOf("comparemasked_helpers", ` // test{{.VType}}CompareMasked tests the simd masked comparison method f against the expected behavior generated by want @@ -580,32 +661,32 @@ func (t templateData) CPUfeature() string { } var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", ` -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} { ones := x.Equal(x).ToInt{{.WxC}}() return y.Greater(x).ToInt{{.WxC}}().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} { ones := x.Equal(x).ToInt{{.WxC}}() return x.Greater(y).ToInt{{.WxC}}().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} { ones := x.Equal(x).ToInt{{.WxC}}() return x.Equal(y).ToInt{{.WxC}}().Xor(ones).asMask() @@ -613,18 +694,18 @@ func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} { `) var bitWiseIntTemplate = shapedTemplateOf(intShapes, "bitwise int complement", ` -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) Not() {{.VType}} { return x.Xor(x.Equal(x).ToInt{{.WxC}}()) } `) var bitWiseUintTemplate = shapedTemplateOf(uintShapes, "bitwise uint complement", ` -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) Not() {{.VType}} { return x.Xor(x.Equal(x).ToInt{{.WxC}}().As{{.VType}}()) } @@ -643,9 +724,9 @@ func (t templateData) CPUfeatureAVX2if8() string { } var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", ` -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}} func (x {{.VType}}) Greater(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() {{- if eq .EWidth 8}} @@ -657,9 +738,9 @@ func (x {{.VType}}) Greater(y {{.VType}}) Mask{{.WxC}} { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}} func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() {{- if eq .EWidth 8}} @@ -671,9 +752,9 @@ func (x {{.VType}}) Less(y {{.VType}}) Mask{{.WxC}} { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}} func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).ToInt{{.WxC}}() @@ -685,9 +766,9 @@ func (x {{.VType}}) GreaterEqual(y {{.VType}}) Mask{{.WxC}} { return b.Xor(signs).Greater(a.Xor(signs)).ToInt{{.WxC}}().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +// Emulated, CPU Feature: {{.CPUfeatureAVX2if8}} func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).ToInt{{.WxC}}() @@ -699,9 +780,9 @@ func (x {{.VType}}) LessEqual(y {{.VType}}) Mask{{.WxC}} { return a.Xor(signs).Greater(b.Xor(signs)).ToInt{{.WxC}}().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature {{.CPUfeature}} +// Emulated, CPU Feature: {{.CPUfeature}} func (x {{.VType}}) NotEqual(y {{.VType}}) Mask{{.WxC}} { a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() ones := x.Equal(x).ToInt{{.WxC}}() @@ -758,7 +839,7 @@ func (x {{.VType}}) Masked(mask Mask{{.WxC}}) {{.VType}} { {{- end -}} } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x {{.VType}}) Merge(y {{.VType}}, mask Mask{{.WxC}}) {{.VType}} { {{- if eq .Base "Int" }} return y.blendMasked(x, mask) @@ -789,7 +870,7 @@ var broadcastTemplate = templateOf("Broadcast functions", ` // Broadcast{{.VType}} returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature {{.CPUfeatureBC}} +// Emulated, CPU Feature: {{.CPUfeatureBC}} func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} { var z {{.As128BitVec }} return z.SetElem(0, x).Broadcast{{.Vwidth}}() @@ -804,7 +885,7 @@ func (from {{.Base}}{{.WxC}}) ToMask() (to Mask{{.WxC}}) { `) var stringTemplate = shapedTemplateOf(allShapes, "String methods", ` -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x {{.VType}}) String() string { var s [{{.Count}}]{{.Etype}} x.Store(&s) @@ -862,7 +943,17 @@ func main() { one(*ush, unsafePrologue, unsafePATemplate) } if *uh != "" { - one(*uh, curryTestPrologue("unary simd methods"), unaryTemplate, unaryToInt32, unaryToUint32, unaryToUint16, unaryFlakyTemplate) + one(*uh, curryTestPrologue("unary simd methods"), unaryTemplate, + unaryToInt8, unaryToUint8, unaryToInt16, unaryToUint16, + unaryToInt32, unaryToUint32, unaryToInt64, unaryToUint64, + unaryToFloat32, unaryToFloat64, + unaryToInt64x2, unaryToInt64x4, + unaryToUint64x2, unaryToUint64x4, + unaryToInt32x4, unaryToInt32x8, + unaryToUint32x4, unaryToUint32x8, + unaryToInt16x8, unaryToUint16x8, + unaryFlakyTemplate, + ) } if *bh != "" { one(*bh, curryTestPrologue("binary simd methods"), binaryTemplate) @@ -871,7 +962,7 @@ func main() { one(*th, curryTestPrologue("ternary simd methods"), ternaryTemplate, ternaryFlakyTemplate) } if *ch != "" { - one(*ch, curryTestPrologue("simd methods that compare two operands"), compareTemplate) + one(*ch, curryTestPrologue("simd methods that compare two operands"), compareTemplate, compareUnaryTemplate) } if *cmh != "" { one(*cmh, curryTestPrologue("simd methods that compare two operands under a mask"), compareMaskedTemplate) @@ -1018,7 +1109,7 @@ func nonTemplateRewrites(filename string, prologue func(s string, out io.Writer) out := new(bytes.Buffer) - prologue("go run genfiles.go", out) + prologue("tmplgen", out) for _, rewrite := range rewrites { rewrite(out) } @@ -1054,7 +1145,7 @@ func one(filename string, prologue func(s string, out io.Writer), sats ...shapeA out := new(bytes.Buffer) - prologue("go run genfiles.go", out) + prologue("tmplgen", out) for _, sat := range sats { sat.forTemplates(out) } diff --git a/src/simd/archsimd/compare_gen_amd64.go b/src/simd/archsimd/compare_gen_amd64.go index a8636f0b33..09f8277dc9 100644 --- a/src/simd/archsimd/compare_gen_amd64.go +++ b/src/simd/archsimd/compare_gen_amd64.go @@ -1,278 +1,278 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. //go:build goexperiment.simd package archsimd -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) Less(y Int8x16) Mask8x16 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 { ones := x.Equal(x).ToInt8x16() return y.Greater(x).ToInt8x16().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) LessEqual(y Int8x16) Mask8x16 { ones := x.Equal(x).ToInt8x16() return x.Greater(y).ToInt8x16().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) NotEqual(y Int8x16) Mask8x16 { ones := x.Equal(x).ToInt8x16() return x.Equal(y).ToInt8x16().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) Less(y Int16x8) Mask16x8 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 { ones := x.Equal(x).ToInt16x8() return y.Greater(x).ToInt16x8().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) LessEqual(y Int16x8) Mask16x8 { ones := x.Equal(x).ToInt16x8() return x.Greater(y).ToInt16x8().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) NotEqual(y Int16x8) Mask16x8 { ones := x.Equal(x).ToInt16x8() return x.Equal(y).ToInt16x8().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) Less(y Int32x4) Mask32x4 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 { ones := x.Equal(x).ToInt32x4() return y.Greater(x).ToInt32x4().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) LessEqual(y Int32x4) Mask32x4 { ones := x.Equal(x).ToInt32x4() return x.Greater(y).ToInt32x4().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) NotEqual(y Int32x4) Mask32x4 { ones := x.Equal(x).ToInt32x4() return x.Equal(y).ToInt32x4().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) Less(y Int64x2) Mask64x2 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 { ones := x.Equal(x).ToInt64x2() return y.Greater(x).ToInt64x2().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) LessEqual(y Int64x2) Mask64x2 { ones := x.Equal(x).ToInt64x2() return x.Greater(y).ToInt64x2().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) NotEqual(y Int64x2) Mask64x2 { ones := x.Equal(x).ToInt64x2() return x.Equal(y).ToInt64x2().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) Less(y Int8x32) Mask8x32 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 { ones := x.Equal(x).ToInt8x32() return y.Greater(x).ToInt8x32().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) LessEqual(y Int8x32) Mask8x32 { ones := x.Equal(x).ToInt8x32() return x.Greater(y).ToInt8x32().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) NotEqual(y Int8x32) Mask8x32 { ones := x.Equal(x).ToInt8x32() return x.Equal(y).ToInt8x32().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) Less(y Int16x16) Mask16x16 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 { ones := x.Equal(x).ToInt16x16() return y.Greater(x).ToInt16x16().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) LessEqual(y Int16x16) Mask16x16 { ones := x.Equal(x).ToInt16x16() return x.Greater(y).ToInt16x16().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) NotEqual(y Int16x16) Mask16x16 { ones := x.Equal(x).ToInt16x16() return x.Equal(y).ToInt16x16().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) Less(y Int32x8) Mask32x8 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 { ones := x.Equal(x).ToInt32x8() return y.Greater(x).ToInt32x8().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) LessEqual(y Int32x8) Mask32x8 { ones := x.Equal(x).ToInt32x8() return x.Greater(y).ToInt32x8().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) NotEqual(y Int32x8) Mask32x8 { ones := x.Equal(x).ToInt32x8() return x.Equal(y).ToInt32x8().Xor(ones).asMask() } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) Less(y Int64x4) Mask64x4 { return y.Greater(x) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 { ones := x.Equal(x).ToInt64x4() return y.Greater(x).ToInt64x4().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) LessEqual(y Int64x4) Mask64x4 { ones := x.Equal(x).ToInt64x4() return x.Greater(y).ToInt64x4().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) NotEqual(y Int64x4) Mask64x4 { ones := x.Equal(x).ToInt64x4() return x.Equal(y).ToInt64x4().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x16) Greater(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() signs := BroadcastInt8x16(-1 << (8 - 1)) return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x16) Less(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() signs := BroadcastInt8x16(-1 << (8 - 1)) return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() ones := x.Equal(x).ToInt8x16() @@ -280,9 +280,9 @@ func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt8x16().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() ones := x.Equal(x).ToInt8x16() @@ -290,18 +290,18 @@ func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt8x16().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 { a, b := x.AsInt8x16(), y.AsInt8x16() ones := x.Equal(x).ToInt8x16() return a.Equal(b).ToInt8x16().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) Greater(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() @@ -309,9 +309,9 @@ func (x Uint16x8) Greater(y Uint16x8) Mask16x8 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) Less(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() @@ -319,9 +319,9 @@ func (x Uint16x8) Less(y Uint16x8) Mask16x8 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() @@ -329,9 +329,9 @@ func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt16x8().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() @@ -339,18 +339,18 @@ func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt16x8().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 { a, b := x.AsInt16x8(), y.AsInt16x8() ones := x.Equal(x).ToInt16x8() return a.Equal(b).ToInt16x8().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) Greater(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() @@ -358,9 +358,9 @@ func (x Uint32x4) Greater(y Uint32x4) Mask32x4 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) Less(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() @@ -368,9 +368,9 @@ func (x Uint32x4) Less(y Uint32x4) Mask32x4 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() @@ -378,9 +378,9 @@ func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt32x4().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() @@ -388,18 +388,18 @@ func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt32x4().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 { a, b := x.AsInt32x4(), y.AsInt32x4() ones := x.Equal(x).ToInt32x4() return a.Equal(b).ToInt32x4().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) Greater(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() @@ -407,9 +407,9 @@ func (x Uint64x2) Greater(y Uint64x2) Mask64x2 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) Less(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() @@ -417,9 +417,9 @@ func (x Uint64x2) Less(y Uint64x2) Mask64x2 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() @@ -427,9 +427,9 @@ func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt64x2().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() @@ -437,36 +437,36 @@ func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt64x2().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 { a, b := x.AsInt64x2(), y.AsInt64x2() ones := x.Equal(x).ToInt64x2() return a.Equal(b).ToInt64x2().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) Greater(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() signs := BroadcastInt8x32(-1 << (8 - 1)) return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) Less(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() signs := BroadcastInt8x32(-1 << (8 - 1)) return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() ones := x.Equal(x).ToInt8x32() @@ -474,9 +474,9 @@ func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt8x32().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() ones := x.Equal(x).ToInt8x32() @@ -484,18 +484,18 @@ func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt8x32().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 { a, b := x.AsInt8x32(), y.AsInt8x32() ones := x.Equal(x).ToInt8x32() return a.Equal(b).ToInt8x32().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) Greater(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() @@ -503,9 +503,9 @@ func (x Uint16x16) Greater(y Uint16x16) Mask16x16 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) Less(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() @@ -513,9 +513,9 @@ func (x Uint16x16) Less(y Uint16x16) Mask16x16 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() @@ -523,9 +523,9 @@ func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt16x16().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() @@ -533,18 +533,18 @@ func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt16x16().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 { a, b := x.AsInt16x16(), y.AsInt16x16() ones := x.Equal(x).ToInt16x16() return a.Equal(b).ToInt16x16().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) Greater(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() @@ -552,9 +552,9 @@ func (x Uint32x8) Greater(y Uint32x8) Mask32x8 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) Less(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() @@ -562,9 +562,9 @@ func (x Uint32x8) Less(y Uint32x8) Mask32x8 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() @@ -572,9 +572,9 @@ func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt32x8().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() @@ -582,18 +582,18 @@ func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt32x8().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 { a, b := x.AsInt32x8(), y.AsInt32x8() ones := x.Equal(x).ToInt32x8() return a.Equal(b).ToInt32x8().Xor(ones).asMask() } -// Greater returns a mask whose elements indicate whether x > y +// Greater returns a mask whose elements indicate whether x > y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) Greater(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() @@ -601,9 +601,9 @@ func (x Uint64x4) Greater(y Uint64x4) Mask64x4 { return a.Xor(signs).Greater(b.Xor(signs)) } -// Less returns a mask whose elements indicate whether x < y +// Less returns a mask whose elements indicate whether x < y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) Less(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() @@ -611,9 +611,9 @@ func (x Uint64x4) Less(y Uint64x4) Mask64x4 { return b.Xor(signs).Greater(a.Xor(signs)) } -// GreaterEqual returns a mask whose elements indicate whether x >= y +// GreaterEqual returns a mask whose elements indicate whether x >= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() @@ -621,9 +621,9 @@ func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 { return b.Xor(signs).Greater(a.Xor(signs)).ToInt64x4().Xor(ones).asMask() } -// LessEqual returns a mask whose elements indicate whether x <= y +// LessEqual returns a mask whose elements indicate whether x <= y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() @@ -631,9 +631,9 @@ func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 { return a.Xor(signs).Greater(b.Xor(signs)).ToInt64x4().Xor(ones).asMask() } -// NotEqual returns a mask whose elements indicate whether x != y +// NotEqual returns a mask whose elements indicate whether x != y. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 { a, b := x.AsInt64x4(), y.AsInt64x4() ones := x.Equal(x).ToInt64x4() diff --git a/src/simd/archsimd/cpu.go b/src/simd/archsimd/cpu.go index bb0ebbc16a..d0c0ff5426 100644 --- a/src/simd/archsimd/cpu.go +++ b/src/simd/archsimd/cpu.go @@ -1,4 +1,4 @@ -// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. //go:build goexperiment.simd diff --git a/src/simd/archsimd/extra_amd64.go b/src/simd/archsimd/extra_amd64.go index 921e148f63..b0dba6d234 100644 --- a/src/simd/archsimd/extra_amd64.go +++ b/src/simd/archsimd/extra_amd64.go @@ -19,7 +19,7 @@ func ClearAVXUpperBits() // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int8x16) IsZero() bool @@ -27,7 +27,7 @@ func (x Int8x16) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int8x32) IsZero() bool @@ -35,7 +35,7 @@ func (x Int8x32) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int16x8) IsZero() bool @@ -43,7 +43,7 @@ func (x Int16x8) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int16x16) IsZero() bool @@ -51,7 +51,7 @@ func (x Int16x16) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int32x4) IsZero() bool @@ -59,7 +59,7 @@ func (x Int32x4) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int32x8) IsZero() bool @@ -67,7 +67,7 @@ func (x Int32x8) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int64x2) IsZero() bool @@ -75,7 +75,7 @@ func (x Int64x2) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Int64x4) IsZero() bool @@ -83,7 +83,7 @@ func (x Int64x4) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint8x16) IsZero() bool @@ -91,7 +91,7 @@ func (x Uint8x16) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint8x32) IsZero() bool @@ -99,7 +99,7 @@ func (x Uint8x32) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint16x8) IsZero() bool @@ -107,7 +107,7 @@ func (x Uint16x8) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint16x16) IsZero() bool @@ -115,7 +115,7 @@ func (x Uint16x16) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint32x4) IsZero() bool @@ -123,7 +123,7 @@ func (x Uint32x4) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint32x8) IsZero() bool @@ -131,7 +131,7 @@ func (x Uint32x8) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint64x2) IsZero() bool @@ -139,7 +139,43 @@ func (x Uint64x2) IsZero() bool // IsZero returns true if all elements of x are zeros. // // This method compiles to VPTEST x, x. -// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y +// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y. // // Asm: VPTEST, CPU Feature: AVX func (x Uint64x4) IsZero() bool + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) IsNaN() Mask32x4 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) IsNaN() Mask32x8 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPS, CPU Feature: AVX512 +func (x Float32x16) IsNaN() Mask32x16 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) IsNaN() Mask64x2 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) IsNaN() Mask64x4 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPD, CPU Feature: AVX512 +func (x Float64x8) IsNaN() Mask64x8 diff --git a/src/simd/archsimd/internal/simd_test/binary_helpers_test.go b/src/simd/archsimd/internal/simd_test/binary_helpers_test.go index 9c361dbeb9..c725f657f3 100644 --- a/src/simd/archsimd/internal/simd_test/binary_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/binary_helpers_test.go @@ -1,6 +1,6 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. -//go:build goexperiment.simd +//go:build goexperiment.simd && amd64 // This file contains functions testing binary simd methods. // Each function in this file is specialized for a diff --git a/src/simd/archsimd/internal/simd_test/binary_test.go b/src/simd/archsimd/internal/simd_test/binary_test.go index fa2b9511ca..28efdcb52f 100644 --- a/src/simd/archsimd/internal/simd_test/binary_test.go +++ b/src/simd/archsimd/internal/simd_test/binary_test.go @@ -17,23 +17,29 @@ func TestAdd(t *testing.T) { testFloat64x2Binary(t, archsimd.Float64x2.Add, addSlice[float64]) testFloat64x4Binary(t, archsimd.Float64x4.Add, addSlice[float64]) - testInt16x16Binary(t, archsimd.Int16x16.Add, addSlice[int16]) testInt16x8Binary(t, archsimd.Int16x8.Add, addSlice[int16]) testInt32x4Binary(t, archsimd.Int32x4.Add, addSlice[int32]) - testInt32x8Binary(t, archsimd.Int32x8.Add, addSlice[int32]) testInt64x2Binary(t, archsimd.Int64x2.Add, addSlice[int64]) - testInt64x4Binary(t, archsimd.Int64x4.Add, addSlice[int64]) testInt8x16Binary(t, archsimd.Int8x16.Add, addSlice[int8]) - testInt8x32Binary(t, archsimd.Int8x32.Add, addSlice[int8]) - testUint16x16Binary(t, archsimd.Uint16x16.Add, addSlice[uint16]) - testUint16x8Binary(t, archsimd.Uint16x8.Add, addSlice[uint16]) testUint32x4Binary(t, archsimd.Uint32x4.Add, addSlice[uint32]) - testUint32x8Binary(t, archsimd.Uint32x8.Add, addSlice[uint32]) testUint64x2Binary(t, archsimd.Uint64x2.Add, addSlice[uint64]) - testUint64x4Binary(t, archsimd.Uint64x4.Add, addSlice[uint64]) + testUint16x8Binary(t, archsimd.Uint16x8.Add, addSlice[uint16]) testUint8x16Binary(t, archsimd.Uint8x16.Add, addSlice[uint8]) - testUint8x32Binary(t, archsimd.Uint8x32.Add, addSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Binary(t, archsimd.Uint16x16.Add, addSlice[uint16]) + testUint32x8Binary(t, archsimd.Uint32x8.Add, addSlice[uint32]) + testUint64x4Binary(t, archsimd.Uint64x4.Add, addSlice[uint64]) + testUint8x32Binary(t, archsimd.Uint8x32.Add, addSlice[uint8]) + } + + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.Add, addSlice[int16]) + testInt32x8Binary(t, archsimd.Int32x8.Add, addSlice[int32]) + testInt64x4Binary(t, archsimd.Int64x4.Add, addSlice[int64]) + testInt8x32Binary(t, archsimd.Int8x32.Add, addSlice[int8]) + } if archsimd.X86.AVX512() { testFloat32x16Binary(t, archsimd.Float32x16.Add, addSlice[float32]) @@ -55,23 +61,29 @@ func TestSub(t *testing.T) { testFloat64x2Binary(t, archsimd.Float64x2.Sub, subSlice[float64]) testFloat64x4Binary(t, archsimd.Float64x4.Sub, subSlice[float64]) - testInt16x16Binary(t, archsimd.Int16x16.Sub, subSlice[int16]) - testInt16x8Binary(t, archsimd.Int16x8.Sub, subSlice[int16]) testInt32x4Binary(t, archsimd.Int32x4.Sub, subSlice[int32]) - testInt32x8Binary(t, archsimd.Int32x8.Sub, subSlice[int32]) + testInt16x8Binary(t, archsimd.Int16x8.Sub, subSlice[int16]) testInt64x2Binary(t, archsimd.Int64x2.Sub, subSlice[int64]) - testInt64x4Binary(t, archsimd.Int64x4.Sub, subSlice[int64]) testInt8x16Binary(t, archsimd.Int8x16.Sub, subSlice[int8]) - testInt8x32Binary(t, archsimd.Int8x32.Sub, subSlice[int8]) - testUint16x16Binary(t, archsimd.Uint16x16.Sub, subSlice[uint16]) - testUint16x8Binary(t, archsimd.Uint16x8.Sub, subSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.Sub, subSlice[int16]) + testInt32x8Binary(t, archsimd.Int32x8.Sub, subSlice[int32]) + testInt64x4Binary(t, archsimd.Int64x4.Sub, subSlice[int64]) + testInt8x32Binary(t, archsimd.Int8x32.Sub, subSlice[int8]) + } + testUint32x4Binary(t, archsimd.Uint32x4.Sub, subSlice[uint32]) - testUint32x8Binary(t, archsimd.Uint32x8.Sub, subSlice[uint32]) + testUint16x8Binary(t, archsimd.Uint16x8.Sub, subSlice[uint16]) testUint64x2Binary(t, archsimd.Uint64x2.Sub, subSlice[uint64]) - testUint64x4Binary(t, archsimd.Uint64x4.Sub, subSlice[uint64]) testUint8x16Binary(t, archsimd.Uint8x16.Sub, subSlice[uint8]) - testUint8x32Binary(t, archsimd.Uint8x32.Sub, subSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Binary(t, archsimd.Uint16x16.Sub, subSlice[uint16]) + testUint32x8Binary(t, archsimd.Uint32x8.Sub, subSlice[uint32]) + testUint64x4Binary(t, archsimd.Uint64x4.Sub, subSlice[uint64]) + testUint8x32Binary(t, archsimd.Uint8x32.Sub, subSlice[uint8]) + } if archsimd.X86.AVX512() { testFloat32x16Binary(t, archsimd.Float32x16.Sub, subSlice[float32]) @@ -93,10 +105,13 @@ func TestMax(t *testing.T) { // testFloat64x2Binary(t, archsimd.Float64x2.Max, maxSlice[float64]) // nan is wrong // testFloat64x4Binary(t, archsimd.Float64x4.Max, maxSlice[float64]) // nan is wrong - testInt16x16Binary(t, archsimd.Int16x16.Max, maxSlice[int16]) testInt16x8Binary(t, archsimd.Int16x8.Max, maxSlice[int16]) testInt32x4Binary(t, archsimd.Int32x4.Max, maxSlice[int32]) - testInt32x8Binary(t, archsimd.Int32x8.Max, maxSlice[int32]) + + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.Max, maxSlice[int16]) + testInt32x8Binary(t, archsimd.Int32x8.Max, maxSlice[int32]) + } if archsimd.X86.AVX512() { testInt64x2Binary(t, archsimd.Int64x2.Max, maxSlice[int64]) @@ -104,12 +119,18 @@ func TestMax(t *testing.T) { } testInt8x16Binary(t, archsimd.Int8x16.Max, maxSlice[int8]) - testInt8x32Binary(t, archsimd.Int8x32.Max, maxSlice[int8]) - testUint16x16Binary(t, archsimd.Uint16x16.Max, maxSlice[uint16]) + if archsimd.X86.AVX2() { + testInt8x32Binary(t, archsimd.Int8x32.Max, maxSlice[int8]) + } + testUint16x8Binary(t, archsimd.Uint16x8.Max, maxSlice[uint16]) testUint32x4Binary(t, archsimd.Uint32x4.Max, maxSlice[uint32]) - testUint32x8Binary(t, archsimd.Uint32x8.Max, maxSlice[uint32]) + + if archsimd.X86.AVX2() { + testUint16x16Binary(t, archsimd.Uint16x16.Max, maxSlice[uint16]) + testUint32x8Binary(t, archsimd.Uint32x8.Max, maxSlice[uint32]) + } if archsimd.X86.AVX512() { testUint64x2Binary(t, archsimd.Uint64x2.Max, maxSlice[uint64]) @@ -117,7 +138,10 @@ func TestMax(t *testing.T) { } testUint8x16Binary(t, archsimd.Uint8x16.Max, maxSlice[uint8]) - testUint8x32Binary(t, archsimd.Uint8x32.Max, maxSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint8x32Binary(t, archsimd.Uint8x32.Max, maxSlice[uint8]) + } if archsimd.X86.AVX512() { // testFloat32x16Binary(t, archsimd.Float32x16.Max, maxSlice[float32]) // nan is wrong @@ -139,10 +163,13 @@ func TestMin(t *testing.T) { // testFloat64x2Binary(t, archsimd.Float64x2.Min, minSlice[float64]) // nan is wrong // testFloat64x4Binary(t, archsimd.Float64x4.Min, minSlice[float64]) // nan is wrong - testInt16x16Binary(t, archsimd.Int16x16.Min, minSlice[int16]) testInt16x8Binary(t, archsimd.Int16x8.Min, minSlice[int16]) testInt32x4Binary(t, archsimd.Int32x4.Min, minSlice[int32]) - testInt32x8Binary(t, archsimd.Int32x8.Min, minSlice[int32]) + + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.Min, minSlice[int16]) + testInt32x8Binary(t, archsimd.Int32x8.Min, minSlice[int32]) + } if archsimd.X86.AVX512() { testInt64x2Binary(t, archsimd.Int64x2.Min, minSlice[int64]) @@ -150,12 +177,18 @@ func TestMin(t *testing.T) { } testInt8x16Binary(t, archsimd.Int8x16.Min, minSlice[int8]) - testInt8x32Binary(t, archsimd.Int8x32.Min, minSlice[int8]) - testUint16x16Binary(t, archsimd.Uint16x16.Min, minSlice[uint16]) + if archsimd.X86.AVX2() { + testInt8x32Binary(t, archsimd.Int8x32.Min, minSlice[int8]) + } + testUint16x8Binary(t, archsimd.Uint16x8.Min, minSlice[uint16]) testUint32x4Binary(t, archsimd.Uint32x4.Min, minSlice[uint32]) - testUint32x8Binary(t, archsimd.Uint32x8.Min, minSlice[uint32]) + + if archsimd.X86.AVX2() { + testUint16x16Binary(t, archsimd.Uint16x16.Min, minSlice[uint16]) + testUint32x8Binary(t, archsimd.Uint32x8.Min, minSlice[uint32]) + } if archsimd.X86.AVX512() { testUint64x2Binary(t, archsimd.Uint64x2.Min, minSlice[uint64]) @@ -163,7 +196,10 @@ func TestMin(t *testing.T) { } testUint8x16Binary(t, archsimd.Uint8x16.Min, minSlice[uint8]) - testUint8x32Binary(t, archsimd.Uint8x32.Min, minSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint8x32Binary(t, archsimd.Uint8x32.Min, minSlice[uint8]) + } if archsimd.X86.AVX512() { // testFloat32x16Binary(t, archsimd.Float32x16.Min, minSlice[float32]) // nan is wrong @@ -180,23 +216,29 @@ func TestMin(t *testing.T) { } func TestAnd(t *testing.T) { - testInt16x16Binary(t, archsimd.Int16x16.And, andSlice[int16]) testInt16x8Binary(t, archsimd.Int16x8.And, andSlice[int16]) testInt32x4Binary(t, archsimd.Int32x4.And, andSlice[int32]) - testInt32x8Binary(t, archsimd.Int32x8.And, andSlice[int32]) testInt64x2Binary(t, archsimd.Int64x2.And, andSlice[int64]) - testInt64x4Binary(t, archsimd.Int64x4.And, andSlice[int64]) testInt8x16Binary(t, archsimd.Int8x16.And, andSlice[int8]) - testInt8x32Binary(t, archsimd.Int8x32.And, andSlice[int8]) - testUint16x16Binary(t, archsimd.Uint16x16.And, andSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.And, andSlice[int16]) + testInt32x8Binary(t, archsimd.Int32x8.And, andSlice[int32]) + testInt64x4Binary(t, archsimd.Int64x4.And, andSlice[int64]) + testInt8x32Binary(t, archsimd.Int8x32.And, andSlice[int8]) + } + testUint16x8Binary(t, archsimd.Uint16x8.And, andSlice[uint16]) testUint32x4Binary(t, archsimd.Uint32x4.And, andSlice[uint32]) - testUint32x8Binary(t, archsimd.Uint32x8.And, andSlice[uint32]) testUint64x2Binary(t, archsimd.Uint64x2.And, andSlice[uint64]) - testUint64x4Binary(t, archsimd.Uint64x4.And, andSlice[uint64]) testUint8x16Binary(t, archsimd.Uint8x16.And, andSlice[uint8]) - testUint8x32Binary(t, archsimd.Uint8x32.And, andSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Binary(t, archsimd.Uint16x16.And, andSlice[uint16]) + testUint32x8Binary(t, archsimd.Uint32x8.And, andSlice[uint32]) + testUint64x4Binary(t, archsimd.Uint64x4.And, andSlice[uint64]) + testUint8x32Binary(t, archsimd.Uint8x32.And, andSlice[uint8]) + } if archsimd.X86.AVX512() { // testInt8x64Binary(t, archsimd.Int8x64.And, andISlice[int8]) // missing @@ -211,23 +253,29 @@ func TestAnd(t *testing.T) { } func TestAndNot(t *testing.T) { - testInt16x16Binary(t, archsimd.Int16x16.AndNot, andNotSlice[int16]) testInt16x8Binary(t, archsimd.Int16x8.AndNot, andNotSlice[int16]) testInt32x4Binary(t, archsimd.Int32x4.AndNot, andNotSlice[int32]) - testInt32x8Binary(t, archsimd.Int32x8.AndNot, andNotSlice[int32]) testInt64x2Binary(t, archsimd.Int64x2.AndNot, andNotSlice[int64]) - testInt64x4Binary(t, archsimd.Int64x4.AndNot, andNotSlice[int64]) testInt8x16Binary(t, archsimd.Int8x16.AndNot, andNotSlice[int8]) - testInt8x32Binary(t, archsimd.Int8x32.AndNot, andNotSlice[int8]) - testUint16x16Binary(t, archsimd.Uint16x16.AndNot, andNotSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.AndNot, andNotSlice[int16]) + testInt32x8Binary(t, archsimd.Int32x8.AndNot, andNotSlice[int32]) + testInt64x4Binary(t, archsimd.Int64x4.AndNot, andNotSlice[int64]) + testInt8x32Binary(t, archsimd.Int8x32.AndNot, andNotSlice[int8]) + } + + testUint8x16Binary(t, archsimd.Uint8x16.AndNot, andNotSlice[uint8]) testUint16x8Binary(t, archsimd.Uint16x8.AndNot, andNotSlice[uint16]) testUint32x4Binary(t, archsimd.Uint32x4.AndNot, andNotSlice[uint32]) - testUint32x8Binary(t, archsimd.Uint32x8.AndNot, andNotSlice[uint32]) testUint64x2Binary(t, archsimd.Uint64x2.AndNot, andNotSlice[uint64]) - testUint64x4Binary(t, archsimd.Uint64x4.AndNot, andNotSlice[uint64]) - testUint8x16Binary(t, archsimd.Uint8x16.AndNot, andNotSlice[uint8]) - testUint8x32Binary(t, archsimd.Uint8x32.AndNot, andNotSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Binary(t, archsimd.Uint16x16.AndNot, andNotSlice[uint16]) + testUint32x8Binary(t, archsimd.Uint32x8.AndNot, andNotSlice[uint32]) + testUint64x4Binary(t, archsimd.Uint64x4.AndNot, andNotSlice[uint64]) + testUint8x32Binary(t, archsimd.Uint8x32.AndNot, andNotSlice[uint8]) + } if archsimd.X86.AVX512() { testInt8x64Binary(t, archsimd.Int8x64.AndNot, andNotSlice[int8]) @@ -242,23 +290,29 @@ func TestAndNot(t *testing.T) { } func TestXor(t *testing.T) { - testInt16x16Binary(t, archsimd.Int16x16.Xor, xorSlice[int16]) testInt16x8Binary(t, archsimd.Int16x8.Xor, xorSlice[int16]) testInt32x4Binary(t, archsimd.Int32x4.Xor, xorSlice[int32]) - testInt32x8Binary(t, archsimd.Int32x8.Xor, xorSlice[int32]) testInt64x2Binary(t, archsimd.Int64x2.Xor, xorSlice[int64]) - testInt64x4Binary(t, archsimd.Int64x4.Xor, xorSlice[int64]) testInt8x16Binary(t, archsimd.Int8x16.Xor, xorSlice[int8]) - testInt8x32Binary(t, archsimd.Int8x32.Xor, xorSlice[int8]) - testUint16x16Binary(t, archsimd.Uint16x16.Xor, xorSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.Xor, xorSlice[int16]) + testInt32x8Binary(t, archsimd.Int32x8.Xor, xorSlice[int32]) + testInt64x4Binary(t, archsimd.Int64x4.Xor, xorSlice[int64]) + testInt8x32Binary(t, archsimd.Int8x32.Xor, xorSlice[int8]) + } + testUint16x8Binary(t, archsimd.Uint16x8.Xor, xorSlice[uint16]) testUint32x4Binary(t, archsimd.Uint32x4.Xor, xorSlice[uint32]) - testUint32x8Binary(t, archsimd.Uint32x8.Xor, xorSlice[uint32]) testUint64x2Binary(t, archsimd.Uint64x2.Xor, xorSlice[uint64]) - testUint64x4Binary(t, archsimd.Uint64x4.Xor, xorSlice[uint64]) testUint8x16Binary(t, archsimd.Uint8x16.Xor, xorSlice[uint8]) - testUint8x32Binary(t, archsimd.Uint8x32.Xor, xorSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Binary(t, archsimd.Uint16x16.Xor, xorSlice[uint16]) + testUint32x8Binary(t, archsimd.Uint32x8.Xor, xorSlice[uint32]) + testUint64x4Binary(t, archsimd.Uint64x4.Xor, xorSlice[uint64]) + testUint8x32Binary(t, archsimd.Uint8x32.Xor, xorSlice[uint8]) + } if archsimd.X86.AVX512() { // testInt8x64Binary(t, archsimd.Int8x64.Xor, andISlice[int8]) // missing @@ -273,23 +327,29 @@ func TestXor(t *testing.T) { } func TestOr(t *testing.T) { - testInt16x16Binary(t, archsimd.Int16x16.Or, orSlice[int16]) testInt16x8Binary(t, archsimd.Int16x8.Or, orSlice[int16]) testInt32x4Binary(t, archsimd.Int32x4.Or, orSlice[int32]) - testInt32x8Binary(t, archsimd.Int32x8.Or, orSlice[int32]) testInt64x2Binary(t, archsimd.Int64x2.Or, orSlice[int64]) - testInt64x4Binary(t, archsimd.Int64x4.Or, orSlice[int64]) testInt8x16Binary(t, archsimd.Int8x16.Or, orSlice[int8]) - testInt8x32Binary(t, archsimd.Int8x32.Or, orSlice[int8]) - testUint16x16Binary(t, archsimd.Uint16x16.Or, orSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.Or, orSlice[int16]) + testInt32x8Binary(t, archsimd.Int32x8.Or, orSlice[int32]) + testInt64x4Binary(t, archsimd.Int64x4.Or, orSlice[int64]) + testInt8x32Binary(t, archsimd.Int8x32.Or, orSlice[int8]) + } + testUint16x8Binary(t, archsimd.Uint16x8.Or, orSlice[uint16]) testUint32x4Binary(t, archsimd.Uint32x4.Or, orSlice[uint32]) - testUint32x8Binary(t, archsimd.Uint32x8.Or, orSlice[uint32]) testUint64x2Binary(t, archsimd.Uint64x2.Or, orSlice[uint64]) - testUint64x4Binary(t, archsimd.Uint64x4.Or, orSlice[uint64]) testUint8x16Binary(t, archsimd.Uint8x16.Or, orSlice[uint8]) - testUint8x32Binary(t, archsimd.Uint8x32.Or, orSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Binary(t, archsimd.Uint16x16.Or, orSlice[uint16]) + testUint32x8Binary(t, archsimd.Uint32x8.Or, orSlice[uint32]) + testUint64x4Binary(t, archsimd.Uint64x4.Or, orSlice[uint64]) + testUint8x32Binary(t, archsimd.Uint8x32.Or, orSlice[uint8]) + } if archsimd.X86.AVX512() { // testInt8x64Binary(t, archsimd.Int8x64.Or, andISlice[int8]) // missing @@ -309,10 +369,13 @@ func TestMul(t *testing.T) { testFloat64x2Binary(t, archsimd.Float64x2.Mul, mulSlice[float64]) testFloat64x4Binary(t, archsimd.Float64x4.Mul, mulSlice[float64]) - testInt16x16Binary(t, archsimd.Int16x16.Mul, mulSlice[int16]) testInt16x8Binary(t, archsimd.Int16x8.Mul, mulSlice[int16]) testInt32x4Binary(t, archsimd.Int32x4.Mul, mulSlice[int32]) - testInt32x8Binary(t, archsimd.Int32x8.Mul, mulSlice[int32]) + + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.Mul, mulSlice[int16]) + testInt32x8Binary(t, archsimd.Int32x8.Mul, mulSlice[int32]) + } // testInt8x16Binary(t, archsimd.Int8x16.Mul, mulSlice[int8]) // nope // testInt8x32Binary(t, archsimd.Int8x32.Mul, mulSlice[int8]) diff --git a/src/simd/archsimd/internal/simd_test/compare_helpers_test.go b/src/simd/archsimd/internal/simd_test/compare_helpers_test.go index 279fdc7155..7a33f0ffa4 100644 --- a/src/simd/archsimd/internal/simd_test/compare_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/compare_helpers_test.go @@ -1,6 +1,6 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. -//go:build goexperiment.simd +//go:build goexperiment.simd && amd64 // This file contains functions testing simd methods that compare two operands. // Each function in this file is specialized for a @@ -462,3 +462,87 @@ func testFloat64x8Compare(t *testing.T, f func(_, _ archsimd.Float64x8) archsimd return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x); t.Logf("y=%v", y) }) }) } + +// testFloat32x4UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want +func testFloat32x4UnaryCompare(t *testing.T, f func(x archsimd.Float32x4) archsimd.Mask32x4, want func(x []float32) []int64) { + n := 4 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x4Slice(x) + g := make([]int32, n) + f(a).ToInt32x4().StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want +func testFloat64x2UnaryCompare(t *testing.T, f func(x archsimd.Float64x2) archsimd.Mask64x2, want func(x []float64) []int64) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]int64, n) + f(a).ToInt64x2().StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x8UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want +func testFloat32x8UnaryCompare(t *testing.T, f func(x archsimd.Float32x8) archsimd.Mask32x8, want func(x []float32) []int64) { + n := 8 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x8Slice(x) + g := make([]int32, n) + f(a).ToInt32x8().StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x4UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want +func testFloat64x4UnaryCompare(t *testing.T, f func(x archsimd.Float64x4) archsimd.Mask64x4, want func(x []float64) []int64) { + n := 4 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x4Slice(x) + g := make([]int64, n) + f(a).ToInt64x4().StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x16UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want +func testFloat32x16UnaryCompare(t *testing.T, f func(x archsimd.Float32x16) archsimd.Mask32x16, want func(x []float32) []int64) { + n := 16 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x16Slice(x) + g := make([]int32, n) + f(a).ToInt32x16().StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x8UnaryCompare tests the simd unary comparison method f against the expected behavior generated by want +func testFloat64x8UnaryCompare(t *testing.T, f func(x archsimd.Float64x8) archsimd.Mask64x8, want func(x []float64) []int64) { + n := 8 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x8Slice(x) + g := make([]int64, n) + f(a).ToInt64x8().StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, s64(g), w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} diff --git a/src/simd/archsimd/internal/simd_test/compare_test.go b/src/simd/archsimd/internal/simd_test/compare_test.go index 4485e9bdaa..ea8514ac93 100644 --- a/src/simd/archsimd/internal/simd_test/compare_test.go +++ b/src/simd/archsimd/internal/simd_test/compare_test.go @@ -21,32 +21,39 @@ func TestLess(t *testing.T) { testFloat64x2Compare(t, archsimd.Float64x2.Less, lessSlice[float64]) testFloat64x4Compare(t, archsimd.Float64x4.Less, lessSlice[float64]) - testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16]) testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16]) testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32]) - testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32]) testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64]) - testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64]) testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8]) - testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8]) - testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16]) + if archsimd.X86.AVX2() { + testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16]) + testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32]) + testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64]) + testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8]) + + testInt16x16Compare(t, archsimd.Int16x16.Less, lessSlice[int16]) + testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32]) + testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64]) + testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8]) + } + testInt16x8Compare(t, archsimd.Int16x8.Less, lessSlice[int16]) testInt32x4Compare(t, archsimd.Int32x4.Less, lessSlice[int32]) - testInt32x8Compare(t, archsimd.Int32x8.Less, lessSlice[int32]) testInt64x2Compare(t, archsimd.Int64x2.Less, lessSlice[int64]) - testInt64x4Compare(t, archsimd.Int64x4.Less, lessSlice[int64]) testInt8x16Compare(t, archsimd.Int8x16.Less, lessSlice[int8]) - testInt8x32Compare(t, archsimd.Int8x32.Less, lessSlice[int8]) - testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16]) testUint16x8Compare(t, archsimd.Uint16x8.Less, lessSlice[uint16]) testUint32x4Compare(t, archsimd.Uint32x4.Less, lessSlice[uint32]) - testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32]) testUint64x2Compare(t, archsimd.Uint64x2.Less, lessSlice[uint64]) - testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64]) testUint8x16Compare(t, archsimd.Uint8x16.Less, lessSlice[uint8]) - testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16]) + testUint32x8Compare(t, archsimd.Uint32x8.Less, lessSlice[uint32]) + testUint64x4Compare(t, archsimd.Uint64x4.Less, lessSlice[uint64]) + testUint8x32Compare(t, archsimd.Uint8x32.Less, lessSlice[uint8]) + } if archsimd.X86.AVX512() { testUint16x16Compare(t, archsimd.Uint16x16.Less, lessSlice[uint16]) @@ -77,23 +84,29 @@ func TestLessEqual(t *testing.T) { testFloat64x2Compare(t, archsimd.Float64x2.LessEqual, lessEqualSlice[float64]) testFloat64x4Compare(t, archsimd.Float64x4.LessEqual, lessEqualSlice[float64]) - testInt16x16Compare(t, archsimd.Int16x16.LessEqual, lessEqualSlice[int16]) testInt16x8Compare(t, archsimd.Int16x8.LessEqual, lessEqualSlice[int16]) testInt32x4Compare(t, archsimd.Int32x4.LessEqual, lessEqualSlice[int32]) - testInt32x8Compare(t, archsimd.Int32x8.LessEqual, lessEqualSlice[int32]) testInt64x2Compare(t, archsimd.Int64x2.LessEqual, lessEqualSlice[int64]) - testInt64x4Compare(t, archsimd.Int64x4.LessEqual, lessEqualSlice[int64]) testInt8x16Compare(t, archsimd.Int8x16.LessEqual, lessEqualSlice[int8]) - testInt8x32Compare(t, archsimd.Int8x32.LessEqual, lessEqualSlice[int8]) - testUint16x16Compare(t, archsimd.Uint16x16.LessEqual, lessEqualSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Compare(t, archsimd.Int16x16.LessEqual, lessEqualSlice[int16]) + testInt32x8Compare(t, archsimd.Int32x8.LessEqual, lessEqualSlice[int32]) + testInt64x4Compare(t, archsimd.Int64x4.LessEqual, lessEqualSlice[int64]) + testInt8x32Compare(t, archsimd.Int8x32.LessEqual, lessEqualSlice[int8]) + } + testUint16x8Compare(t, archsimd.Uint16x8.LessEqual, lessEqualSlice[uint16]) testUint32x4Compare(t, archsimd.Uint32x4.LessEqual, lessEqualSlice[uint32]) - testUint32x8Compare(t, archsimd.Uint32x8.LessEqual, lessEqualSlice[uint32]) testUint64x2Compare(t, archsimd.Uint64x2.LessEqual, lessEqualSlice[uint64]) - testUint64x4Compare(t, archsimd.Uint64x4.LessEqual, lessEqualSlice[uint64]) testUint8x16Compare(t, archsimd.Uint8x16.LessEqual, lessEqualSlice[uint8]) - testUint8x32Compare(t, archsimd.Uint8x32.LessEqual, lessEqualSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Compare(t, archsimd.Uint16x16.LessEqual, lessEqualSlice[uint16]) + testUint32x8Compare(t, archsimd.Uint32x8.LessEqual, lessEqualSlice[uint32]) + testUint64x4Compare(t, archsimd.Uint64x4.LessEqual, lessEqualSlice[uint64]) + testUint8x32Compare(t, archsimd.Uint8x32.LessEqual, lessEqualSlice[uint8]) + } if archsimd.X86.AVX512() { testFloat32x16Compare(t, archsimd.Float32x16.LessEqual, lessEqualSlice[float32]) @@ -115,25 +128,29 @@ func TestGreater(t *testing.T) { testFloat64x2Compare(t, archsimd.Float64x2.Greater, greaterSlice[float64]) testFloat64x4Compare(t, archsimd.Float64x4.Greater, greaterSlice[float64]) - testInt16x16Compare(t, archsimd.Int16x16.Greater, greaterSlice[int16]) testInt16x8Compare(t, archsimd.Int16x8.Greater, greaterSlice[int16]) testInt32x4Compare(t, archsimd.Int32x4.Greater, greaterSlice[int32]) - testInt32x8Compare(t, archsimd.Int32x8.Greater, greaterSlice[int32]) - testInt64x2Compare(t, archsimd.Int64x2.Greater, greaterSlice[int64]) - testInt64x4Compare(t, archsimd.Int64x4.Greater, greaterSlice[int64]) testInt8x16Compare(t, archsimd.Int8x16.Greater, greaterSlice[int8]) - testInt8x32Compare(t, archsimd.Int8x32.Greater, greaterSlice[int8]) - testUint16x16Compare(t, archsimd.Uint16x16.Greater, greaterSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Compare(t, archsimd.Int16x16.Greater, greaterSlice[int16]) + testInt32x8Compare(t, archsimd.Int32x8.Greater, greaterSlice[int32]) + testInt64x4Compare(t, archsimd.Int64x4.Greater, greaterSlice[int64]) + testInt8x32Compare(t, archsimd.Int8x32.Greater, greaterSlice[int8]) + } + testUint16x8Compare(t, archsimd.Uint16x8.Greater, greaterSlice[uint16]) testUint32x4Compare(t, archsimd.Uint32x4.Greater, greaterSlice[uint32]) - testUint32x8Compare(t, archsimd.Uint32x8.Greater, greaterSlice[uint32]) - testUint64x2Compare(t, archsimd.Uint64x2.Greater, greaterSlice[uint64]) - testUint64x4Compare(t, archsimd.Uint64x4.Greater, greaterSlice[uint64]) testUint8x16Compare(t, archsimd.Uint8x16.Greater, greaterSlice[uint8]) - testUint8x32Compare(t, archsimd.Uint8x32.Greater, greaterSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Compare(t, archsimd.Uint16x16.Greater, greaterSlice[uint16]) + testUint32x8Compare(t, archsimd.Uint32x8.Greater, greaterSlice[uint32]) + testUint64x4Compare(t, archsimd.Uint64x4.Greater, greaterSlice[uint64]) + testUint8x32Compare(t, archsimd.Uint8x32.Greater, greaterSlice[uint8]) + } if archsimd.X86.AVX512() { @@ -156,23 +173,29 @@ func TestGreaterEqual(t *testing.T) { testFloat64x2Compare(t, archsimd.Float64x2.GreaterEqual, greaterEqualSlice[float64]) testFloat64x4Compare(t, archsimd.Float64x4.GreaterEqual, greaterEqualSlice[float64]) - testInt16x16Compare(t, archsimd.Int16x16.GreaterEqual, greaterEqualSlice[int16]) testInt16x8Compare(t, archsimd.Int16x8.GreaterEqual, greaterEqualSlice[int16]) testInt32x4Compare(t, archsimd.Int32x4.GreaterEqual, greaterEqualSlice[int32]) - testInt32x8Compare(t, archsimd.Int32x8.GreaterEqual, greaterEqualSlice[int32]) testInt64x2Compare(t, archsimd.Int64x2.GreaterEqual, greaterEqualSlice[int64]) - testInt64x4Compare(t, archsimd.Int64x4.GreaterEqual, greaterEqualSlice[int64]) testInt8x16Compare(t, archsimd.Int8x16.GreaterEqual, greaterEqualSlice[int8]) - testInt8x32Compare(t, archsimd.Int8x32.GreaterEqual, greaterEqualSlice[int8]) - testUint16x16Compare(t, archsimd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Compare(t, archsimd.Int16x16.GreaterEqual, greaterEqualSlice[int16]) + testInt32x8Compare(t, archsimd.Int32x8.GreaterEqual, greaterEqualSlice[int32]) + testInt64x4Compare(t, archsimd.Int64x4.GreaterEqual, greaterEqualSlice[int64]) + testInt8x32Compare(t, archsimd.Int8x32.GreaterEqual, greaterEqualSlice[int8]) + } + testUint16x8Compare(t, archsimd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16]) testUint32x4Compare(t, archsimd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32]) - testUint32x8Compare(t, archsimd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32]) testUint64x2Compare(t, archsimd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64]) - testUint64x4Compare(t, archsimd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64]) testUint8x16Compare(t, archsimd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8]) - testUint8x32Compare(t, archsimd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Compare(t, archsimd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16]) + testUint32x8Compare(t, archsimd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32]) + testUint64x4Compare(t, archsimd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64]) + testUint8x32Compare(t, archsimd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8]) + } if archsimd.X86.AVX512() { testFloat32x16Compare(t, archsimd.Float32x16.GreaterEqual, greaterEqualSlice[float32]) @@ -194,23 +217,29 @@ func TestEqual(t *testing.T) { testFloat64x2Compare(t, archsimd.Float64x2.Equal, equalSlice[float64]) testFloat64x4Compare(t, archsimd.Float64x4.Equal, equalSlice[float64]) - testInt16x16Compare(t, archsimd.Int16x16.Equal, equalSlice[int16]) testInt16x8Compare(t, archsimd.Int16x8.Equal, equalSlice[int16]) testInt32x4Compare(t, archsimd.Int32x4.Equal, equalSlice[int32]) - testInt32x8Compare(t, archsimd.Int32x8.Equal, equalSlice[int32]) testInt64x2Compare(t, archsimd.Int64x2.Equal, equalSlice[int64]) - testInt64x4Compare(t, archsimd.Int64x4.Equal, equalSlice[int64]) testInt8x16Compare(t, archsimd.Int8x16.Equal, equalSlice[int8]) - testInt8x32Compare(t, archsimd.Int8x32.Equal, equalSlice[int8]) - testUint16x16Compare(t, archsimd.Uint16x16.Equal, equalSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Compare(t, archsimd.Int16x16.Equal, equalSlice[int16]) + testInt32x8Compare(t, archsimd.Int32x8.Equal, equalSlice[int32]) + testInt64x4Compare(t, archsimd.Int64x4.Equal, equalSlice[int64]) + testInt8x32Compare(t, archsimd.Int8x32.Equal, equalSlice[int8]) + } + testUint16x8Compare(t, archsimd.Uint16x8.Equal, equalSlice[uint16]) testUint32x4Compare(t, archsimd.Uint32x4.Equal, equalSlice[uint32]) - testUint32x8Compare(t, archsimd.Uint32x8.Equal, equalSlice[uint32]) testUint64x2Compare(t, archsimd.Uint64x2.Equal, equalSlice[uint64]) - testUint64x4Compare(t, archsimd.Uint64x4.Equal, equalSlice[uint64]) testUint8x16Compare(t, archsimd.Uint8x16.Equal, equalSlice[uint8]) - testUint8x32Compare(t, archsimd.Uint8x32.Equal, equalSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Compare(t, archsimd.Uint16x16.Equal, equalSlice[uint16]) + testUint32x8Compare(t, archsimd.Uint32x8.Equal, equalSlice[uint32]) + testUint64x4Compare(t, archsimd.Uint64x4.Equal, equalSlice[uint64]) + testUint8x32Compare(t, archsimd.Uint8x32.Equal, equalSlice[uint8]) + } if archsimd.X86.AVX512() { testFloat32x16Compare(t, archsimd.Float32x16.Equal, equalSlice[float32]) @@ -232,23 +261,29 @@ func TestNotEqual(t *testing.T) { testFloat64x2Compare(t, archsimd.Float64x2.NotEqual, notEqualSlice[float64]) testFloat64x4Compare(t, archsimd.Float64x4.NotEqual, notEqualSlice[float64]) - testInt16x16Compare(t, archsimd.Int16x16.NotEqual, notEqualSlice[int16]) testInt16x8Compare(t, archsimd.Int16x8.NotEqual, notEqualSlice[int16]) testInt32x4Compare(t, archsimd.Int32x4.NotEqual, notEqualSlice[int32]) - testInt32x8Compare(t, archsimd.Int32x8.NotEqual, notEqualSlice[int32]) testInt64x2Compare(t, archsimd.Int64x2.NotEqual, notEqualSlice[int64]) - testInt64x4Compare(t, archsimd.Int64x4.NotEqual, notEqualSlice[int64]) testInt8x16Compare(t, archsimd.Int8x16.NotEqual, notEqualSlice[int8]) - testInt8x32Compare(t, archsimd.Int8x32.NotEqual, notEqualSlice[int8]) - testUint16x16Compare(t, archsimd.Uint16x16.NotEqual, notEqualSlice[uint16]) + if archsimd.X86.AVX2() { + testInt16x16Compare(t, archsimd.Int16x16.NotEqual, notEqualSlice[int16]) + testInt32x8Compare(t, archsimd.Int32x8.NotEqual, notEqualSlice[int32]) + testInt64x4Compare(t, archsimd.Int64x4.NotEqual, notEqualSlice[int64]) + testInt8x32Compare(t, archsimd.Int8x32.NotEqual, notEqualSlice[int8]) + } + testUint16x8Compare(t, archsimd.Uint16x8.NotEqual, notEqualSlice[uint16]) testUint32x4Compare(t, archsimd.Uint32x4.NotEqual, notEqualSlice[uint32]) - testUint32x8Compare(t, archsimd.Uint32x8.NotEqual, notEqualSlice[uint32]) testUint64x2Compare(t, archsimd.Uint64x2.NotEqual, notEqualSlice[uint64]) - testUint64x4Compare(t, archsimd.Uint64x4.NotEqual, notEqualSlice[uint64]) testUint8x16Compare(t, archsimd.Uint8x16.NotEqual, notEqualSlice[uint8]) - testUint8x32Compare(t, archsimd.Uint8x32.NotEqual, notEqualSlice[uint8]) + + if archsimd.X86.AVX2() { + testUint16x16Compare(t, archsimd.Uint16x16.NotEqual, notEqualSlice[uint16]) + testUint32x8Compare(t, archsimd.Uint32x8.NotEqual, notEqualSlice[uint32]) + testUint64x4Compare(t, archsimd.Uint64x4.NotEqual, notEqualSlice[uint64]) + testUint8x32Compare(t, archsimd.Uint8x32.NotEqual, notEqualSlice[uint8]) + } if archsimd.X86.AVX512() { testFloat32x16Compare(t, archsimd.Float32x16.NotEqual, notEqualSlice[float32]) @@ -263,3 +298,49 @@ func TestNotEqual(t *testing.T) { testUint64x8Compare(t, archsimd.Uint64x8.NotEqual, notEqualSlice[uint64]) } } + +func TestIsNaN(t *testing.T) { + testFloat32x4UnaryCompare(t, archsimd.Float32x4.IsNaN, isNaNSlice[float32]) + testFloat32x8UnaryCompare(t, archsimd.Float32x8.IsNaN, isNaNSlice[float32]) + testFloat64x2UnaryCompare(t, archsimd.Float64x2.IsNaN, isNaNSlice[float64]) + testFloat64x4UnaryCompare(t, archsimd.Float64x4.IsNaN, isNaNSlice[float64]) + + if archsimd.X86.AVX512() { + testFloat32x16UnaryCompare(t, archsimd.Float32x16.IsNaN, isNaNSlice[float32]) + testFloat64x8UnaryCompare(t, archsimd.Float64x8.IsNaN, isNaNSlice[float64]) + } + + // Test x.IsNaN().Or(y.IsNaN()), which is optimized to VCMPP(S|D) $3, x, y. + want32 := mapCompare(func(x, y float32) bool { return x != x || y != y }) + want64 := mapCompare(func(x, y float64) bool { return x != x || y != y }) + testFloat32x4Compare(t, + func(x, y archsimd.Float32x4) archsimd.Mask32x4 { + return x.IsNaN().Or(y.IsNaN()) + }, want32) + testFloat64x2Compare(t, + func(x, y archsimd.Float64x2) archsimd.Mask64x2 { + return x.IsNaN().Or(y.IsNaN()) + }, want64) + + if archsimd.X86.AVX2() { + testFloat32x8Compare(t, + func(x, y archsimd.Float32x8) archsimd.Mask32x8 { + return x.IsNaN().Or(y.IsNaN()) + }, want32) + testFloat64x4Compare(t, + func(x, y archsimd.Float64x4) archsimd.Mask64x4 { + return x.IsNaN().Or(y.IsNaN()) + }, want64) + } + + if archsimd.X86.AVX512() { + testFloat32x16Compare(t, + func(x, y archsimd.Float32x16) archsimd.Mask32x16 { + return x.IsNaN().Or(y.IsNaN()) + }, want32) + testFloat64x8Compare(t, + func(x, y archsimd.Float64x8) archsimd.Mask64x8 { + return x.IsNaN().Or(y.IsNaN()) + }, want64) + } +} diff --git a/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go b/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go index 7ceee652a9..c7197568ed 100644 --- a/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/comparemasked_helpers_test.go @@ -1,6 +1,6 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. -//go:build goexperiment.simd +//go:build goexperiment.simd && amd64 // This file contains functions testing simd methods that compare two operands under a mask. // Each function in this file is specialized for a diff --git a/src/simd/archsimd/internal/simd_test/generate.go b/src/simd/archsimd/internal/simd_test/generate.go index e744a5299f..4bc24fea2d 100644 --- a/src/simd/archsimd/internal/simd_test/generate.go +++ b/src/simd/archsimd/internal/simd_test/generate.go @@ -7,5 +7,6 @@ package simd // Invoke code generators. +// The test helpers are generated by tmplgen from the archsimd package. -//go:generate go run -C ../.. genfiles.go +//go:generate go -C ../.. generate diff --git a/src/simd/archsimd/internal/simd_test/helpers_test.go b/src/simd/archsimd/internal/simd_test/helpers_test.go index b9d5098dba..d6963586c0 100644 --- a/src/simd/archsimd/internal/simd_test/helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/helpers_test.go @@ -126,8 +126,22 @@ func map1[T, U any](elem func(x T) U) func(x []T) []U { } } -// map1 returns a function that returns the slice of the results of applying -// comparison function elem to the respective elements of its two slice inputs. +// map1n returns a function that returns the slice of the results of applying +// input parameter elem to the respective elements of its single slice input, +// extended (with zero values) or truncated to length n. +func map1n[T, U any](elem func(x T) U, n int) func(x []T) []U { + return func(x []T) []U { + s := make([]U, n) + for i := range min(len(x), n) { + s[i] = elem(x[i]) + } + return s + } +} + +// mapCompare returns a function that returns the slice of the results of applying +// comparison function elem to the respective elements of its two slice inputs, +// and returns -1 if the comparison is true, 0 otherwise. func mapCompare[T number](elem func(x, y T) bool) func(x, y []T) []int64 { return func(x, y []T) []int64 { s := make([]int64, len(x)) @@ -168,12 +182,14 @@ var nzero = -zero var inf = 1 / zero var ninf = -1 / zero var nan = math.NaN() +var snan32 = math.Float32frombits(0x7f800001) +var snan64 = math.Float64frombits(0x7ff0000000000001) // N controls how large the test vectors are const N = 144 -var float32s = nOf(N, []float32{float32(inf), float32(ninf), 1, float32(nan), float32(zero), 2, float32(nan), float32(zero), 3, float32(-zero), float32(1.0 / zero), float32(-1.0 / zero), 1.0 / 2, 1.0 / 4, 1.0 / 8, 1.0 / 1000, 1.0 / 1000000, 1, -1, 0, 2, -2, 3, -3, math.MaxFloat32, 1 / math.MaxFloat32, 10, -10, 100, 20, -20, 300, -300, -4000, -80, -160, -3200, -64, -4, -8, -16, -32, -64}) -var float64s = nOf(N, []float64{inf, ninf, nan, zero, -zero, 1 / zero, -1 / zero, 0.0001, 0.0000001, 1, -1, 0, 2, -2, 3, -3, math.MaxFloat64, 1.0 / math.MaxFloat64, 10, -10, 100, 20, -20, 300, -300, -4000, -80, -16, -32, -64}) +var float32s = nOf(N, []float32{float32(inf), float32(ninf), 1, float32(nan), snan32, -float32(nan), -snan32, float32(zero), 2, float32(nan), float32(zero), 3, float32(-zero), float32(1.0 / zero), float32(-1.0 / zero), 1.0 / 2, 1.0 / 4, 1.0 / 8, 1.0 / 1000, 1.0 / 1000000, 1, -1, 0, 2, -2, 3, -3, math.MaxFloat32, 1 / math.MaxFloat32, 10, -10, 100, 20, -20, 300, -300, -4000, -80, -160, -3200, -64, -4, -8, -16, -32, -64}) +var float64s = nOf(N, []float64{inf, ninf, nan, snan64, -nan, -snan64, zero, -zero, 1 / zero, -1 / zero, 0.0001, 0.0000001, 1, -1, 0, 2, -2, 3, -3, math.MaxFloat64, 1.0 / math.MaxFloat64, 10, -10, 100, 20, -20, 300, -300, -4000, -80, -16, -32, -64}) var int32s = nOf(N, []int32{1, -1, 0, 2, 4, 8, 1024, 0xffffff, -0xffffff, 0x55555, 0x77777, 0xccccc, -0x55555, -0x77777, -0xccccc, -4, -8, -16, -32, -64}) var uint32s = nOf(N, []uint32{1, 0, 2, 4, 8, 1024, 0xffffff, ^uint32(0xffffff), 0x55555, 0x77777, 0xccccc, ^uint32(0x55555), ^uint32(0x77777), ^uint32(0xccccc)}) diff --git a/src/simd/archsimd/internal/simd_test/simd_test.go b/src/simd/archsimd/internal/simd_test/simd_test.go index 83925ae789..36bde92455 100644 --- a/src/simd/archsimd/internal/simd_test/simd_test.go +++ b/src/simd/archsimd/internal/simd_test/simd_test.go @@ -13,6 +13,7 @@ import ( "simd/archsimd" "slices" "testing" + "unsafe" ) func TestMain(m *testing.M) { @@ -225,6 +226,10 @@ func TestShiftAll(t *testing.T) { } func TestSlicesInt8(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} v := archsimd.LoadInt8x32Slice(a) @@ -258,6 +263,10 @@ func TestSlicesInt8GetElem(t *testing.T) { } func TestSlicesInt8TooShortLoad(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } defer func() { if r := recover(); r != nil { t.Logf("Saw EXPECTED panic %v", r) @@ -274,6 +283,10 @@ func TestSlicesInt8TooShortLoad(t *testing.T) { } func TestSlicesInt8TooShortStore(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } defer func() { if r := recover(); r != nil { t.Logf("Saw EXPECTED panic %v", r) @@ -303,6 +316,10 @@ func TestSlicesFloat64(t *testing.T) { // TODO: try to reduce this test to be smaller. func TestMergeLocals(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } testMergeLocalswrapper(t, archsimd.Int64x4.Add) } @@ -362,12 +379,79 @@ func TestBitMaskFromBitsLoad(t *testing.T) { } func TestBitMaskToBits(t *testing.T) { - if !archsimd.X86.AVX512() { - t.Skip("Test requires X86.AVX512, not available on this hardware") - return + int8s := []int8{ + 0, 1, 1, 0, 0, 1, 0, 1, + 1, 0, 1, 1, 0, 0, 1, 0, + 1, 0, 0, 1, 1, 0, 1, 0, + 0, 1, 1, 0, 0, 1, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 0, 1, 1, 0, 0, 1, + 1, 0, 1, 0, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, + } + int16s := make([]int16, 32) + for i := range int16s { + int16s[i] = int16(int8s[i]) + } + int32s := make([]int32, 16) + for i := range int32s { + int32s[i] = int32(int8s[i]) + } + int64s := make([]int64, 8) + for i := range int64s { + int64s[i] = int64(int8s[i]) + } + want64 := uint64(0) + for i := range int8s { + want64 |= uint64(int8s[i]) << i + } + want32 := uint32(want64) + want16 := uint16(want64) + want8 := uint8(want64) + want4 := want8 & 0b1111 + want2 := want4 & 0b11 + + if v := archsimd.LoadInt8x16Slice(int8s[:16]).ToMask().ToBits(); v != want16 { + t.Errorf("want %b, got %b", want16, v) + } + if v := archsimd.LoadInt32x4Slice(int32s[:4]).ToMask().ToBits(); v != want4 { + t.Errorf("want %b, got %b", want4, v) + } + if v := archsimd.LoadInt32x8Slice(int32s[:8]).ToMask().ToBits(); v != want8 { + t.Errorf("want %b, got %b", want8, v) + } + if v := archsimd.LoadInt64x2Slice(int64s[:2]).ToMask().ToBits(); v != want2 { + t.Errorf("want %b, got %b", want2, v) + } + if v := archsimd.LoadInt64x4Slice(int64s[:4]).ToMask().ToBits(); v != want4 { + t.Errorf("want %b, got %b", want4, v) } - if v := archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 { - t.Errorf("Want 0b101, got %b", v) + + if archsimd.X86.AVX2() { + if v := archsimd.LoadInt8x32Slice(int8s[:32]).ToMask().ToBits(); v != want32 { + t.Errorf("want %b, got %b", want32, v) + } + } + + if archsimd.X86.AVX512() { + if v := archsimd.LoadInt8x64Slice(int8s).ToMask().ToBits(); v != want64 { + t.Errorf("want %b, got %b", want64, v) + } + if v := archsimd.LoadInt16x8Slice(int16s[:8]).ToMask().ToBits(); v != want8 { + t.Errorf("want %b, got %b", want8, v) + } + if v := archsimd.LoadInt16x16Slice(int16s[:16]).ToMask().ToBits(); v != want16 { + t.Errorf("want %b, got %b", want16, v) + } + if v := archsimd.LoadInt16x32Slice(int16s).ToMask().ToBits(); v != want32 { + t.Errorf("want %b, got %b", want32, v) + } + if v := archsimd.LoadInt32x16Slice(int32s).ToMask().ToBits(); v != want16 { + t.Errorf("want %b, got %b", want16, v) + } + if v := archsimd.LoadInt64x8Slice(int64s).ToMask().ToBits(); v != want8 { + t.Errorf("want %b, got %b", want8, v) + } } } @@ -385,6 +469,10 @@ func TestBitMaskToBitsStore(t *testing.T) { } func TestMergeFloat(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } k := make([]int64, 4, 4) s := make([]float64, 4, 4) @@ -472,6 +560,10 @@ func TestBroadcastUint16x8(t *testing.T) { } func TestBroadcastInt8x32(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } s := make([]int8, 32, 32) archsimd.BroadcastInt8x32(-123).StoreSlice(s) checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123, @@ -1105,6 +1197,10 @@ func TestSelectTernOptInt32x16(t *testing.T) { } func TestMaskedMerge(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } x := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) y := archsimd.LoadInt64x4Slice([]int64{5, 6, 1, 1}) z := archsimd.LoadInt64x4Slice([]int64{-1, -2, -3, -4}) @@ -1123,40 +1219,6 @@ func TestMaskedMerge(t *testing.T) { } } -func TestDotProductQuadruple(t *testing.T) { - if !archsimd.X86.AVXVNNI() { - t.Skip("Test requires X86.AVXVNNI, not available on this hardware") - return - } - xd := make([]int8, 16) - yd := make([]uint8, 16) - zd := make([]int32, 4) - wanted1 := make([]int32, 4) - wanted2 := make([]int32, 4) - res1 := make([]int32, 4) - res2 := make([]int32, 4) - for i := range 4 { - xd[i] = 5 - yd[i] = 6 - zd[i] = 3 - wanted1[i] = 30 - wanted2[i] = 30 - } - x := archsimd.LoadInt8x16Slice(xd) - y := archsimd.LoadUint8x16Slice(yd) - z := archsimd.LoadInt32x4Slice(zd) - x.DotProductQuadruple(y).StoreSlice(res1) - x.DotProductQuadruple(y).Add(z).StoreSlice(res1) - for i := range 4 { - if res1[i] != wanted1[i] { - t.Errorf("got %d wanted %d", res1[i], wanted1[i]) - } - if res2[i] != wanted2[i] { - t.Errorf("got %d wanted %d", res2[i], wanted2[i]) - } - } -} - func TestPermuteScalars(t *testing.T) { x := []int32{11, 12, 13, 14} want := []int32{12, 13, 14, 11} @@ -1166,6 +1228,10 @@ func TestPermuteScalars(t *testing.T) { } func TestPermuteScalarsGrouped(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } x := []int32{11, 12, 13, 14, 21, 22, 23, 24} want := []int32{12, 13, 14, 11, 22, 23, 24, 21} got := make([]int32, 8) @@ -1190,6 +1256,10 @@ func TestPermuteScalarsLo(t *testing.T) { } func TestPermuteScalarsHiGrouped(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114} want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111} got := make([]int16, len(x)) @@ -1198,6 +1268,10 @@ func TestPermuteScalarsHiGrouped(t *testing.T) { } func TestPermuteScalarsLoGrouped(t *testing.T) { + if !archsimd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17} want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17} got := make([]int16, len(x)) @@ -1222,3 +1296,147 @@ func TestClMul(t *testing.T) { foo(y.CarrylessMultiply(0, 0, y), []uint64{5, 0}) } + +func addPairsSlice[T number](a, b []T) []T { + r := make([]T, len(a)) + for i := range len(a) / 2 { + r[i] = a[2*i] + a[2*i+1] + r[i+len(a)/2] = b[2*i] + b[2*i+1] + } + return r +} + +func subPairsSlice[T number](a, b []T) []T { + r := make([]T, len(a)) + for i := range len(a) / 2 { + r[i] = a[2*i] - a[2*i+1] + r[i+len(a)/2] = b[2*i] - b[2*i+1] + } + return r +} + +func addPairsGroupedSlice[T number](a, b []T) []T { + group := int(128 / unsafe.Sizeof(a[0])) + r := make([]T, 0, len(a)) + for i := range len(a) / group { + r = append(r, addPairsSlice(a[i*group:(i+1)*group], b[i*group:(i+1)*group])...) + } + return r +} + +func subPairsGroupedSlice[T number](a, b []T) []T { + group := int(128 / unsafe.Sizeof(a[0])) + r := make([]T, 0, len(a)) + for i := range len(a) / group { + r = append(r, subPairsSlice(a[i*group:(i+1)*group], b[i*group:(i+1)*group])...) + } + return r +} + +func TestAddSubPairs(t *testing.T) { + testInt16x8Binary(t, archsimd.Int16x8.AddPairs, addPairsSlice[int16]) + testInt16x8Binary(t, archsimd.Int16x8.SubPairs, subPairsSlice[int16]) + testUint16x8Binary(t, archsimd.Uint16x8.AddPairs, addPairsSlice[uint16]) + testUint16x8Binary(t, archsimd.Uint16x8.SubPairs, subPairsSlice[uint16]) + testInt32x4Binary(t, archsimd.Int32x4.AddPairs, addPairsSlice[int32]) + testInt32x4Binary(t, archsimd.Int32x4.SubPairs, subPairsSlice[int32]) + testUint32x4Binary(t, archsimd.Uint32x4.AddPairs, addPairsSlice[uint32]) + testUint32x4Binary(t, archsimd.Uint32x4.SubPairs, subPairsSlice[uint32]) + testFloat32x4Binary(t, archsimd.Float32x4.AddPairs, addPairsSlice[float32]) + testFloat32x4Binary(t, archsimd.Float32x4.SubPairs, subPairsSlice[float32]) + testFloat64x2Binary(t, archsimd.Float64x2.AddPairs, addPairsSlice[float64]) + testFloat64x2Binary(t, archsimd.Float64x2.SubPairs, subPairsSlice[float64]) + + // Grouped versions + if archsimd.X86.AVX2() { + testInt16x16Binary(t, archsimd.Int16x16.AddPairsGrouped, addPairsGroupedSlice[int16]) + testInt16x16Binary(t, archsimd.Int16x16.SubPairsGrouped, subPairsGroupedSlice[int16]) + testUint16x16Binary(t, archsimd.Uint16x16.AddPairsGrouped, addPairsGroupedSlice[uint16]) + testUint16x16Binary(t, archsimd.Uint16x16.SubPairsGrouped, subPairsGroupedSlice[uint16]) + testInt32x8Binary(t, archsimd.Int32x8.AddPairsGrouped, addPairsGroupedSlice[int32]) + testInt32x8Binary(t, archsimd.Int32x8.SubPairsGrouped, subPairsGroupedSlice[int32]) + testUint32x8Binary(t, archsimd.Uint32x8.AddPairsGrouped, addPairsGroupedSlice[uint32]) + testUint32x8Binary(t, archsimd.Uint32x8.SubPairsGrouped, subPairsGroupedSlice[uint32]) + testFloat32x8Binary(t, archsimd.Float32x8.AddPairsGrouped, addPairsGroupedSlice[float32]) + testFloat32x8Binary(t, archsimd.Float32x8.SubPairsGrouped, subPairsGroupedSlice[float32]) + testFloat64x4Binary(t, archsimd.Float64x4.AddPairsGrouped, addPairsGroupedSlice[float64]) + testFloat64x4Binary(t, archsimd.Float64x4.SubPairsGrouped, subPairsGroupedSlice[float64]) + } +} + +func convConcatSlice[T, U number](a, b []T, conv func(T) U) []U { + r := make([]U, len(a)+len(b)) + for i, v := range a { + r[i] = conv(v) + } + for i, v := range b { + r[len(a)+i] = conv(v) + } + return r +} + +func convConcatGroupedSlice[T, U number](a, b []T, conv func(T) U) []U { + group := int(128 / unsafe.Sizeof(a[0])) + r := make([]U, 0, len(a)+len(b)) + for i := 0; i < len(a)/group; i++ { + r = append(r, convConcatSlice(a[i*group:(i+1)*group], b[i*group:(i+1)*group], conv)...) + } + return r +} + +func TestSaturateConcat(t *testing.T) { + // Int32x4.SaturateToInt16Concat + forSlicePair(t, int32s, 4, func(x, y []int32) bool { + a, b := archsimd.LoadInt32x4Slice(x), archsimd.LoadInt32x4Slice(y) + var out [8]int16 + a.SaturateToInt16Concat(b).Store(&out) + want := convConcatSlice(x, y, satToInt16) + return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) }) + }) + // Int32x4.SaturateToUint16Concat + forSlicePair(t, int32s, 4, func(x, y []int32) bool { + a, b := archsimd.LoadInt32x4Slice(x), archsimd.LoadInt32x4Slice(y) + var out [8]uint16 + a.SaturateToUint16Concat(b).Store(&out) + want := convConcatSlice(x, y, satToUint16) + return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) }) + }) + + if archsimd.X86.AVX2() { + // Int32x8.SaturateToInt16ConcatGrouped + forSlicePair(t, int32s, 8, func(x, y []int32) bool { + a, b := archsimd.LoadInt32x8Slice(x), archsimd.LoadInt32x8Slice(y) + var out [16]int16 + a.SaturateToInt16ConcatGrouped(b).Store(&out) + want := convConcatGroupedSlice(x, y, satToInt16) + return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) }) + }) + // Int32x8.SaturateToUint16ConcatGrouped + forSlicePair(t, int32s, 8, func(x, y []int32) bool { + a, b := archsimd.LoadInt32x8Slice(x), archsimd.LoadInt32x8Slice(y) + var out [16]uint16 + a.SaturateToUint16ConcatGrouped(b).Store(&out) + want := convConcatGroupedSlice(x, y, satToUint16) + return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) }) + }) + } + + if archsimd.X86.AVX512() { + // Int32x16.SaturateToInt16ConcatGrouped + forSlicePair(t, int32s, 16, func(x, y []int32) bool { + a, b := archsimd.LoadInt32x16Slice(x), archsimd.LoadInt32x16Slice(y) + var out [32]int16 + a.SaturateToInt16ConcatGrouped(b).Store(&out) + want := convConcatGroupedSlice(x, y, satToInt16) + return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) }) + }) + // Int32x16.SaturateToUint16ConcatGrouped + forSlicePair(t, int32s, 16, func(x, y []int32) bool { + a, b := archsimd.LoadInt32x16Slice(x), archsimd.LoadInt32x16Slice(y) + var out [32]uint16 + a.SaturateToUint16ConcatGrouped(b).Store(&out) + want := convConcatGroupedSlice(x, y, satToUint16) + return checkSlicesLogInput(t, out[:], want, 0, func() { t.Logf("x=%v, y=%v", x, y) }) + }) + } +} diff --git a/src/simd/archsimd/internal/simd_test/simulation_helpers_test.go b/src/simd/archsimd/internal/simd_test/simulation_helpers_test.go index 2f040ffb3e..ac60b6d377 100644 --- a/src/simd/archsimd/internal/simd_test/simulation_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/simulation_helpers_test.go @@ -29,6 +29,10 @@ func notEqual[T number](x, y T) bool { return x != y } +func isNaN[T float](x T) bool { + return x != x +} + func abs[T number](x T) T { // TODO this will need a non-standard FP-equality test. if x == 0 { // true if x is -0. @@ -121,16 +125,6 @@ func toUint64[T number](x T) uint64 { } func toUint32[T number](x T) uint32 { - switch y := (any(x)).(type) { - case float32: - if y < 0 || y > float32(math.MaxUint32) || y != y { - return math.MaxUint32 - } - case float64: - if y < 0 || y > float64(math.MaxUint32) || y != y { - return math.MaxUint32 - } - } return uint32(x) } @@ -158,6 +152,74 @@ func toFloat64[T number](x T) float64 { return float64(x) } +// X86 specific behavior for conversion from float to int32. +// If the value cannot be represented as int32, it returns -0x80000000. +func floatToInt32_x86[T float](x T) int32 { + switch y := (any(x)).(type) { + case float32: + if y != y || y < math.MinInt32 || + y >= math.MaxInt32 { // float32(MaxInt32) == 0x80000000, actually overflows + return -0x80000000 + } + case float64: + if y != y || y < math.MinInt32 || + y > math.MaxInt32 { // float64(MaxInt32) is exact, no overflow + return -0x80000000 + } + } + return int32(x) +} + +// X86 specific behavior for conversion from float to int64. +// If the value cannot be represented as int64, it returns -0x80000000_00000000. +func floatToInt64_x86[T float](x T) int64 { + switch y := (any(x)).(type) { + case float32: + if y != y || y < math.MinInt64 || + y >= math.MaxInt64 { // float32(MaxInt64) == 0x80000000_00000000, actually overflows + return -0x80000000_00000000 + } + case float64: + if y != y || y < math.MinInt64 || + y >= math.MaxInt64 { // float64(MaxInt64) == 0x80000000_00000000, also overflows + return -0x80000000_00000000 + } + } + return int64(x) +} + +// X86 specific behavior for conversion from float to uint32. +// If the value cannot be represented as uint32, it returns 1<<32 - 1. +func floatToUint32_x86[T float](x T) uint32 { + switch y := (any(x)).(type) { + case float32: + if y < 0 || y > math.MaxUint32 || y != y { + return 1<<32 - 1 + } + case float64: + if y < 0 || y > math.MaxUint32 || y != y { + return 1<<32 - 1 + } + } + return uint32(x) +} + +// X86 specific behavior for conversion from float to uint64. +// If the value cannot be represented as uint64, it returns 1<<64 - 1. +func floatToUint64_x86[T float](x T) uint64 { + switch y := (any(x)).(type) { + case float32: + if y < 0 || y > math.MaxUint64 || y != y { + return 1<<64 - 1 + } + case float64: + if y < 0 || y > math.MaxUint64 || y != y { + return 1<<64 - 1 + } + } + return uint64(x) +} + func ceilResidueForPrecision[T float](i int) func(T) T { f := 1.0 for i > 0 { @@ -241,6 +303,15 @@ func notEqualSlice[T number](x, y []T) []int64 { return mapCompare[T](notEqual)(x, y) } +func isNaNSlice[T float](x []T) []int64 { + return map1[T](func(x T) int64 { + if isNaN(x) { + return -1 + } + return 0 + })(x) +} + func ceilSlice[T float](x []T) []T { return map1[T](ceil)(x) } @@ -272,3 +343,90 @@ func imaSlice[T integer](x, y, z []T) []T { func fmaSlice[T float](x, y, z []T) []T { return map3[T](fma)(x, y, z) } + +func satToInt8[T integer](x T) int8 { + var m int8 = -128 + var M int8 = 127 + if T(M) < T(m) { // expecting T being a larger type + panic("bad input type") + } + if x < T(m) { + return m + } + if x > T(M) { + return M + } + return int8(x) +} + +func satToUint8[T integer](x T) uint8 { + var M uint8 = 255 + if T(M) < 0 { // expecting T being a larger type + panic("bad input type") + } + if x < 0 { + return 0 + } + if x > T(M) { + return M + } + return uint8(x) +} + +func satToInt16[T integer](x T) int16 { + var m int16 = -32768 + var M int16 = 32767 + if T(M) < T(m) { // expecting T being a larger type + panic("bad input type") + } + if x < T(m) { + return m + } + if x > T(M) { + return M + } + return int16(x) +} + +func satToUint16[T integer](x T) uint16 { + var M uint16 = 65535 + if T(M) < 0 { // expecting T being a larger type + panic("bad input type") + } + if x < 0 { + return 0 + } + if x > T(M) { + return M + } + return uint16(x) +} + +func satToInt32[T integer](x T) int32 { + var m int32 = -1 << 31 + var M int32 = 1<<31 - 1 + if T(M) < T(m) { // expecting T being a larger type + panic("bad input type") + } + if x < T(m) { + return m + } + if x > T(M) { + return M + } + return int32(x) +} + +func satToUint32[T integer](x T) uint32 { + var M uint32 = 1<<32 - 1 + if T(M) < 0 { // expecting T being a larger type + panic("bad input type") + } + if x < 0 { + return 0 + } + if x > T(M) { + return M + } + return uint32(x) +} diff --git a/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go b/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go index c37f9ef0ca..2e25010890 100644 --- a/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/ternary_helpers_test.go @@ -1,6 +1,6 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. -//go:build goexperiment.simd +//go:build goexperiment.simd && amd64 // This file contains functions testing ternary simd methods. // Each function in this file is specialized for a diff --git a/src/simd/archsimd/internal/simd_test/unary_helpers_test.go b/src/simd/archsimd/internal/simd_test/unary_helpers_test.go index e2610ad98b..5d14c4ff05 100644 --- a/src/simd/archsimd/internal/simd_test/unary_helpers_test.go +++ b/src/simd/archsimd/internal/simd_test/unary_helpers_test.go @@ -1,6 +1,6 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. -//go:build goexperiment.simd +//go:build goexperiment.simd && amd64 // This file contains functions testing unary simd methods. // Each function in this file is specialized for a @@ -433,915 +433,8400 @@ func testFloat64x8Unary(t *testing.T, f func(_ archsimd.Float64x8) archsimd.Floa }) } -// testInt8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt8x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x16ConvertToInt8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int8x16, want func(x []int8) []int8) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x8ConvertToInt8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int8x16, want func(x []int16) []int8) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x4ConvertToInt8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int8x16, want func(x []int32) []int8) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToInt8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int8x16, want func(x []int64) []int8) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x16ConvertToInt8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int8x16, want func(x []uint8) []int8) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x8ConvertToInt8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int8x16, want func(x []uint16) []int8) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x4ConvertToInt8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int8x16, want func(x []uint32) []int8) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToInt8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int8x16, want func(x []uint64) []int8) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x4ConvertToInt8(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int8x16, want func(x []float32) []int8) { + n := 4 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x4Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToInt8(t *testing.T, f func(x archsimd.Float64x2) archsimd.Int8x16, want func(x []float64) []int8) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToInt8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int8x32, want func(x []int8) []int8) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]int8, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x16ConvertToInt8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int8x16, want func(x []int16) []int8) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x8ConvertToInt8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int8x16, want func(x []int32) []int8) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x4ConvertToInt8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int8x16, want func(x []int64) []int8) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToInt8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int8x32, want func(x []uint8) []int8) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]int8, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x16ConvertToInt8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int8x16, want func(x []uint16) []int8) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x8ConvertToInt8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int8x16, want func(x []uint32) []int8) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x4ConvertToInt8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int8x16, want func(x []uint64) []int8) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x8ConvertToInt8(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int8x16, want func(x []float32) []int8) { + n := 8 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x8Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x4ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x4ConvertToInt8(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int8x16, want func(x []float64) []int8) { + n := 4 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x4Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToInt8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int8x64, want func(x []int8) []int8) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]int8, 64) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToInt8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int8x32, want func(x []int16) []int8) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]int8, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x16ConvertToInt8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int8x16, want func(x []int32) []int8) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x8ConvertToInt8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int8x16, want func(x []int64) []int8) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToInt8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int8x64, want func(x []uint8) []int8) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]int8, 64) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToInt8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int8x32, want func(x []uint16) []int8) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]int8, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x16ConvertToInt8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int8x16, want func(x []uint32) []int8) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x8ConvertToInt8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int8x16, want func(x []uint64) []int8) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x16ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x16ConvertToInt8(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int8x16, want func(x []float32) []int8) { + n := 16 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x16Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x8ConvertToInt8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x8ConvertToInt8(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int8x16, want func(x []float64) []int8) { + n := 8 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x8Slice(x) + g := make([]int8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x16ConvertToUint8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint8x16, want func(x []int8) []uint8) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x8ConvertToUint8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint8x16, want func(x []int16) []uint8) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x4ConvertToUint8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint8x16, want func(x []int32) []uint8) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToUint8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint8x16, want func(x []int64) []uint8) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x16ConvertToUint8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint8x16, want func(x []uint8) []uint8) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x8ConvertToUint8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint8x16, want func(x []uint16) []uint8) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x4ConvertToUint8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint8x16, want func(x []uint32) []uint8) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToUint8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint8x16, want func(x []uint64) []uint8) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x4ConvertToUint8(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint8x16, want func(x []float32) []uint8) { + n := 4 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x4Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToUint8(t *testing.T, f func(x archsimd.Float64x2) archsimd.Uint8x16, want func(x []float64) []uint8) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToUint8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint8x32, want func(x []int8) []uint8) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]uint8, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x16ConvertToUint8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint8x16, want func(x []int16) []uint8) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x8ConvertToUint8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint8x16, want func(x []int32) []uint8) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x4ConvertToUint8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint8x16, want func(x []int64) []uint8) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToUint8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint8x32, want func(x []uint8) []uint8) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]uint8, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x16ConvertToUint8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint8x16, want func(x []uint16) []uint8) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x8ConvertToUint8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint8x16, want func(x []uint32) []uint8) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x4ConvertToUint8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint8x16, want func(x []uint64) []uint8) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x8ConvertToUint8(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint8x16, want func(x []float32) []uint8) { + n := 8 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x8Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x4ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x4ConvertToUint8(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint8x16, want func(x []float64) []uint8) { + n := 4 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x4Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToUint8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint8x64, want func(x []int8) []uint8) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]uint8, 64) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToUint8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint8x32, want func(x []int16) []uint8) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]uint8, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x16ConvertToUint8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint8x16, want func(x []int32) []uint8) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x8ConvertToUint8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint8x16, want func(x []int64) []uint8) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToUint8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint8x64, want func(x []uint8) []uint8) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]uint8, 64) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToUint8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint8x32, want func(x []uint16) []uint8) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]uint8, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x16ConvertToUint8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint8x16, want func(x []uint32) []uint8) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x8ConvertToUint8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint8x16, want func(x []uint64) []uint8) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x16ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x16ConvertToUint8(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint8x16, want func(x []float32) []uint8) { + n := 16 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x16Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x8ConvertToUint8 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x8ConvertToUint8(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint8x16, want func(x []float64) []uint8) { + n := 8 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x8Slice(x) + g := make([]uint8, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x16ConvertToInt16(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int16x16, want func(x []int8) []int16) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]int16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x8ConvertToInt16(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int16x8, want func(x []int16) []int16) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x4ConvertToInt16(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int16x8, want func(x []int32) []int16) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToInt16(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int16x8, want func(x []int64) []int16) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x16ConvertToInt16(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int16x16, want func(x []uint8) []int16) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]int16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x8ConvertToInt16(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int16x8, want func(x []uint16) []int16) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x4ConvertToInt16(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int16x8, want func(x []uint32) []int16) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToInt16(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int16x8, want func(x []uint64) []int16) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x4ConvertToInt16(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int16x8, want func(x []float32) []int16) { + n := 4 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToInt16(t *testing.T, f func(x archsimd.Float64x2) archsimd.Int16x8, want func(x []float64) []int16) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToInt16(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int16x32, want func(x []int8) []int16) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]int16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x16ConvertToInt16(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int16x16, want func(x []int16) []int16) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]int16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x8ConvertToInt16(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int16x8, want func(x []int32) []int16) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x4ConvertToInt16(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int16x8, want func(x []int64) []int16) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToInt16(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int16x32, want func(x []uint8) []int16) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]int16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x16ConvertToInt16(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int16x16, want func(x []uint16) []int16) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]int16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x8ConvertToInt16(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int16x8, want func(x []uint32) []int16) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x4ConvertToInt16(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int16x8, want func(x []uint64) []int16) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x8ConvertToInt16(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int16x8, want func(x []float32) []int16) { + n := 8 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x4ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x4ConvertToInt16(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int16x8, want func(x []float64) []int16) { + n := 4 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToInt16(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int16x32, want func(x []int8) []int16) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]int16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToInt16(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int16x32, want func(x []int16) []int16) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]int16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x16ConvertToInt16(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int16x16, want func(x []int32) []int16) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]int16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x8ConvertToInt16(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int16x8, want func(x []int64) []int16) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToInt16(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int16x32, want func(x []uint8) []int16) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]int16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToInt16(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int16x32, want func(x []uint16) []int16) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]int16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x16ConvertToInt16(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int16x16, want func(x []uint32) []int16) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]int16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x8ConvertToInt16(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int16x8, want func(x []uint64) []int16) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x16ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x16ConvertToInt16(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int16x16, want func(x []float32) []int16) { + n := 16 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x16Slice(x) + g := make([]int16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x8ConvertToInt16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x8ConvertToInt16(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int16x8, want func(x []float64) []int16) { + n := 8 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x16ConvertToUint16(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint16x16, want func(x []int8) []uint16) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]uint16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x8ConvertToUint16(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint16x8, want func(x []int16) []uint16) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x4ConvertToUint16(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint16x8, want func(x []int32) []uint16) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToUint16(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint16x8, want func(x []int64) []uint16) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint16x16, want func(x []uint8) []uint16) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]uint16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint16x8, want func(x []uint16) []uint16) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x4ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint16x8, want func(x []uint32) []uint16) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint16x8, want func(x []uint64) []uint16) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x4ConvertToUint16(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint16x8, want func(x []float32) []uint16) { + n := 4 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToUint16(t *testing.T, f func(x archsimd.Float64x2) archsimd.Uint16x8, want func(x []float64) []uint16) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToUint16(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint16x32, want func(x []int8) []uint16) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]uint16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x16ConvertToUint16(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint16x16, want func(x []int16) []uint16) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]uint16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x8ConvertToUint16(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint16x8, want func(x []int32) []uint16) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x4ConvertToUint16(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint16x8, want func(x []int64) []uint16) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint16x32, want func(x []uint8) []uint16) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]uint16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint16x16, want func(x []uint16) []uint16) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]uint16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint16x8, want func(x []uint32) []uint16) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x4ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint16x8, want func(x []uint64) []uint16) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x8ConvertToUint16(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint16x8, want func(x []float32) []uint16) { + n := 8 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x4ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x4ConvertToUint16(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint16x8, want func(x []float64) []uint16) { + n := 4 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToUint16(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint16x32, want func(x []int8) []uint16) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]uint16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToUint16(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint16x32, want func(x []int16) []uint16) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]uint16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x16ConvertToUint16(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint16x16, want func(x []int32) []uint16) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]uint16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x8ConvertToUint16(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint16x8, want func(x []int64) []uint16) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint16x32, want func(x []uint8) []uint16) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]uint16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint16x32, want func(x []uint16) []uint16) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]uint16, 32) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint16x16, want func(x []uint32) []uint16) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]uint16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint16x8, want func(x []uint64) []uint16) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x16ConvertToUint16(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint16x16, want func(x []float32) []uint16) { + n := 16 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x16Slice(x) + g := make([]uint16, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x8ConvertToUint16(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint16x8, want func(x []float64) []uint16) { + n := 8 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt8x16ConvertToInt32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int32x16, want func(x []int8) []int32) { n := 16 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() a := archsimd.LoadInt8x16Slice(x) - g := make([]int32, n) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt16x8ConvertToInt32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int32x8, want func(x []int16) []int32) { n := 8 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() a := archsimd.LoadInt16x8Slice(x) - g := make([]int32, n) + g := make([]int32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt32x4ConvertToInt32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int32x4, want func(x []int32) []int32) { n := 4 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() a := archsimd.LoadInt32x4Slice(x) - g := make([]int32, n) + g := make([]int32, 4) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt64x2ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToInt32(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int32x4, want func(x []int64) []int32) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint8x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int32x16, want func(x []uint8) []int32) { n := 16 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() a := archsimd.LoadUint8x16Slice(x) - g := make([]int32, n) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint16x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint16x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int32x8, want func(x []uint16) []int32) { n := 8 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() a := archsimd.LoadUint16x8Slice(x) - g := make([]int32, n) + g := make([]int32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint32x4ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int32x4, want func(x []uint32) []int32) { n := 4 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() a := archsimd.LoadUint32x4Slice(x) - g := make([]int32, n) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int32x4, want func(x []uint64) []int32) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]int32, 4) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testFloat32x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat32x4ConvertToInt32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int32x4, want func(x []float32) []int32) { n := 4 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() a := archsimd.LoadFloat32x4Slice(x) - g := make([]int32, n) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToInt32(t *testing.T, f func(x archsimd.Float64x2) archsimd.Int32x4, want func(x []float64) []int32) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToInt32(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int32x16, want func(x []int8) []int32) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt16x16ConvertToInt32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int32x16, want func(x []int16) []int32) { n := 16 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() a := archsimd.LoadInt16x16Slice(x) - g := make([]int32, n) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt32x8ConvertToInt32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int32x8, want func(x []int32) []int32) { n := 8 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() a := archsimd.LoadInt32x8Slice(x) - g := make([]int32, n) + g := make([]int32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt64x4ConvertToInt32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int32x4, want func(x []int64) []int32) { n := 4 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() a := archsimd.LoadInt64x4Slice(x) - g := make([]int32, n) + g := make([]int32, 4) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint8x32ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToInt32(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int32x16, want func(x []uint8) []int32) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]int32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint16x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int32x16, want func(x []uint16) []int32) { n := 16 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() a := archsimd.LoadUint16x16Slice(x) - g := make([]int32, n) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint32x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int32x8, want func(x []uint32) []int32) { n := 8 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() a := archsimd.LoadUint32x8Slice(x) - g := make([]int32, n) + g := make([]int32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint64x4ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int32x4, want func(x []uint64) []int32) { n := 4 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() a := archsimd.LoadUint64x4Slice(x) - g := make([]int32, n) + g := make([]int32, 4) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testFloat32x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat32x8ConvertToInt32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int32x8, want func(x []float32) []int32) { n := 8 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() a := archsimd.LoadFloat32x8Slice(x) - g := make([]int32, n) + g := make([]int32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testFloat64x4ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat64x4ConvertToInt32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int32x4, want func(x []float64) []int32) { n := 4 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() a := archsimd.LoadFloat64x4Slice(x) - g := make([]int32, n) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToInt32(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int32x16, want func(x []int8) []int32) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]int32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToInt32(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int32x16, want func(x []int16) []int32) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt32x16ConvertToInt32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int32x16, want func(x []int32) []int32) { n := 16 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() a := archsimd.LoadInt32x16Slice(x) - g := make([]int32, n) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt64x8ConvertToInt32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int32x8, want func(x []int64) []int32) { n := 8 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() a := archsimd.LoadInt64x8Slice(x) - g := make([]int32, n) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToInt32(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int32x16, want func(x []uint8) []int32) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]int32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToInt32(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int32x16, want func(x []uint16) []int32) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint32x16ConvertToInt32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int32x16, want func(x []uint32) []int32) { n := 16 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() a := archsimd.LoadUint32x16Slice(x) - g := make([]int32, n) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint64x8ConvertToInt32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int32x8, want func(x []uint64) []int32) { n := 8 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() a := archsimd.LoadUint64x8Slice(x) - g := make([]int32, n) + g := make([]int32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testFloat32x16ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat32x16ConvertToInt32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int32x16, want func(x []float32) []int32) { n := 16 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() a := archsimd.LoadFloat32x16Slice(x) - g := make([]int32, n) + g := make([]int32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testFloat64x8ConvertToInt32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat64x8ConvertToInt32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int32x8, want func(x []float64) []int32) { n := 8 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() a := archsimd.LoadFloat64x8Slice(x) - g := make([]int32, n) + g := make([]int32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt8x16ConvertToUint32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint32x16, want func(x []int8) []uint32) { n := 16 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() a := archsimd.LoadInt8x16Slice(x) - g := make([]uint32, n) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt16x8ConvertToUint32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint32x8, want func(x []int16) []uint32) { n := 8 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() a := archsimd.LoadInt16x8Slice(x) - g := make([]uint32, n) + g := make([]uint32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt32x4ConvertToUint32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint32x4, want func(x []int32) []uint32) { n := 4 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() a := archsimd.LoadInt32x4Slice(x) - g := make([]uint32, n) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToUint32(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint32x4, want func(x []int64) []uint32) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]uint32, 4) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint8x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint8x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint32x16, want func(x []uint8) []uint32) { n := 16 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() a := archsimd.LoadUint8x16Slice(x) - g := make([]uint32, n) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint16x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint16x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint32x8, want func(x []uint16) []uint32) { n := 8 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() a := archsimd.LoadUint16x8Slice(x) - g := make([]uint32, n) + g := make([]uint32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint32x4ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint32x4, want func(x []uint32) []uint32) { n := 4 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() a := archsimd.LoadUint32x4Slice(x) - g := make([]uint32, n) + g := make([]uint32, 4) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint64x2ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint32x4, want func(x []uint64) []uint32) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat32x4ConvertToUint32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint32x4, want func(x []float32) []uint32) { n := 4 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() a := archsimd.LoadFloat32x4Slice(x) - g := make([]uint32, n) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToUint32(t *testing.T, f func(x archsimd.Float64x2) archsimd.Uint32x4, want func(x []float64) []uint32) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToUint32(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint32x16, want func(x []int8) []uint32) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt16x16ConvertToUint32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint32x16, want func(x []int16) []uint32) { n := 16 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() a := archsimd.LoadInt16x16Slice(x) - g := make([]uint32, n) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt32x8ConvertToUint32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint32x8, want func(x []int32) []uint32) { n := 8 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() a := archsimd.LoadInt32x8Slice(x) - g := make([]uint32, n) + g := make([]uint32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt64x4ConvertToUint32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint32x4, want func(x []int64) []uint32) { n := 4 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() a := archsimd.LoadInt64x4Slice(x) - g := make([]uint32, n) + g := make([]uint32, 4) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint8x32ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToUint32(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint32x16, want func(x []uint8) []uint32) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]uint32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint16x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint32x16, want func(x []uint16) []uint32) { n := 16 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() a := archsimd.LoadUint16x16Slice(x) - g := make([]uint32, n) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint32x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint32x8, want func(x []uint32) []uint32) { n := 8 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() a := archsimd.LoadUint32x8Slice(x) - g := make([]uint32, n) + g := make([]uint32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint64x4ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint32x4, want func(x []uint64) []uint32) { n := 4 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() a := archsimd.LoadUint64x4Slice(x) - g := make([]uint32, n) + g := make([]uint32, 4) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testFloat32x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat32x8ConvertToUint32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint32x8, want func(x []float32) []uint32) { n := 8 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() a := archsimd.LoadFloat32x8Slice(x) - g := make([]uint32, n) + g := make([]uint32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testFloat64x4ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat64x4ConvertToUint32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint32x4, want func(x []float64) []uint32) { n := 4 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() a := archsimd.LoadFloat64x4Slice(x) - g := make([]uint32, n) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToUint32(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint32x16, want func(x []int8) []uint32) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt16x32ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToUint32(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint32x16, want func(x []int16) []uint32) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]uint32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt32x16ConvertToUint32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint32x16, want func(x []int32) []uint32) { n := 16 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() a := archsimd.LoadInt32x16Slice(x) - g := make([]uint32, n) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testInt64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testInt64x8ConvertToUint32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint32x8, want func(x []int64) []uint32) { n := 8 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() a := archsimd.LoadInt64x8Slice(x) - g := make([]uint32, n) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToUint32(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint32x16, want func(x []uint8) []uint32) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]uint32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToUint32(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint32x16, want func(x []uint16) []uint32) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint32x16ConvertToUint32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint32x16, want func(x []uint32) []uint32) { n := 16 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() a := archsimd.LoadUint32x16Slice(x) - g := make([]uint32, n) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testUint64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testUint64x8ConvertToUint32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint32x8, want func(x []uint64) []uint32) { n := 8 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() a := archsimd.LoadUint64x8Slice(x) - g := make([]uint32, n) + g := make([]uint32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testFloat32x16ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat32x16ConvertToUint32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint32x16, want func(x []float32) []uint32) { n := 16 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() a := archsimd.LoadFloat32x16Slice(x) - g := make([]uint32, n) + g := make([]uint32, 16) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. +// testFloat64x8ConvertToUint32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). func testFloat64x8ConvertToUint32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint32x8, want func(x []float64) []uint32) { n := 8 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() a := archsimd.LoadFloat64x8Slice(x) - g := make([]uint32, n) + g := make([]uint32, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt8x16ConvertToUint16(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint16x16, want func(x []int8) []uint16) { +// testInt8x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x16ConvertToInt64(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int64x8, want func(x []int8) []int64) { n := 16 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() a := archsimd.LoadInt8x16Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x8ConvertToUint16(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint16x8, want func(x []int16) []uint16) { +// testInt16x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x8ConvertToInt64(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int64x8, want func(x []int16) []int64) { n := 8 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() a := archsimd.LoadInt16x8Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint8x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint8x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint16x16, want func(x []uint8) []uint16) { +// testInt32x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x4ConvertToInt64(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int64x4, want func(x []int32) []int64) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToInt64(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int64x2, want func(x []int64) []int64) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x16ConvertToInt64(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int64x8, want func(x []uint8) []int64) { n := 16 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() a := archsimd.LoadUint8x16Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint16x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint16x8, want func(x []uint16) []uint16) { +// testUint16x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x8ConvertToInt64(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int64x8, want func(x []uint16) []int64) { n := 8 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() a := archsimd.LoadUint16x8Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt8x32ConvertToUint16(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint16x32, want func(x []int8) []uint16) { +// testUint32x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x4ConvertToInt64(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int64x4, want func(x []uint32) []int64) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToInt64(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int64x2, want func(x []uint64) []int64) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x4ConvertToInt64(t *testing.T, f func(x archsimd.Float32x4) archsimd.Int64x4, want func(x []float32) []int64) { + n := 4 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToInt64(t *testing.T, f func(x archsimd.Float64x2) archsimd.Int64x2, want func(x []float64) []int64) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToInt64(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int64x8, want func(x []int8) []int64) { n := 32 t.Helper() forSlice(t, int8s, n, func(x []int8) bool { t.Helper() a := archsimd.LoadInt8x32Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x16ConvertToUint16(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint16x16, want func(x []int16) []uint16) { +// testInt16x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x16ConvertToInt64(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int64x8, want func(x []int16) []int64) { n := 16 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() a := archsimd.LoadInt16x16Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x8ConvertToUint16(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint16x8, want func(x []int32) []uint16) { +// testInt32x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x8ConvertToInt64(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int64x8, want func(x []int32) []int64) { n := 8 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() a := archsimd.LoadInt32x8Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint8x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint8x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint16x32, want func(x []uint8) []uint16) { +// testInt64x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x4ConvertToInt64(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int64x4, want func(x []int64) []int64) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToInt64(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int64x8, want func(x []uint8) []int64) { n := 32 t.Helper() forSlice(t, uint8s, n, func(x []uint8) bool { t.Helper() a := archsimd.LoadUint8x32Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint16x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint16x16, want func(x []uint16) []uint16) { +// testUint16x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x16ConvertToInt64(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int64x8, want func(x []uint16) []int64) { n := 16 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() a := archsimd.LoadUint16x16Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint16x8, want func(x []uint32) []uint16) { +// testUint32x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x8ConvertToInt64(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int64x8, want func(x []uint32) []int64) { n := 8 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() a := archsimd.LoadUint32x8Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat32x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x8ConvertToUint16(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint16x8, want func(x []float32) []uint16) { +// testUint64x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x4ConvertToInt64(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int64x4, want func(x []uint64) []int64) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x8ConvertToInt64(t *testing.T, f func(x archsimd.Float32x8) archsimd.Int64x8, want func(x []float32) []int64) { n := 8 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() a := archsimd.LoadFloat32x8Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt16x32ConvertToUint16(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint16x32, want func(x []int16) []uint16) { +// testFloat64x4ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x4ConvertToInt64(t *testing.T, f func(x archsimd.Float64x4) archsimd.Int64x4, want func(x []float64) []int64) { + n := 4 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToInt64(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int64x8, want func(x []int8) []int64) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]int64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToInt64(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int64x8, want func(x []int16) []int64) { n := 32 t.Helper() forSlice(t, int16s, n, func(x []int16) bool { t.Helper() a := archsimd.LoadInt16x32Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt32x16ConvertToUint16(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint16x16, want func(x []int32) []uint16) { +// testInt32x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x16ConvertToInt64(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int64x8, want func(x []int32) []int64) { n := 16 t.Helper() forSlice(t, int32s, n, func(x []int32) bool { t.Helper() a := archsimd.LoadInt32x16Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testInt64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testInt64x8ConvertToUint16(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint16x8, want func(x []int64) []uint16) { +// testInt64x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x8ConvertToInt64(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int64x8, want func(x []int64) []int64) { n := 8 t.Helper() forSlice(t, int64s, n, func(x []int64) bool { t.Helper() a := archsimd.LoadInt64x8Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint16x32ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint16x32ConvertToUint16(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint16x32, want func(x []uint16) []uint16) { +// testUint8x64ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToInt64(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int64x8, want func(x []uint8) []int64) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]int64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToInt64(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int64x8, want func(x []uint16) []int64) { n := 32 t.Helper() forSlice(t, uint16s, n, func(x []uint16) bool { t.Helper() a := archsimd.LoadUint16x32Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint32x16ConvertToUint16(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint16x16, want func(x []uint32) []uint16) { +// testUint32x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x16ConvertToInt64(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int64x8, want func(x []uint32) []int64) { n := 16 t.Helper() forSlice(t, uint32s, n, func(x []uint32) bool { t.Helper() a := archsimd.LoadUint32x16Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testUint64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testUint64x8ConvertToUint16(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint16x8, want func(x []uint64) []uint16) { +// testUint64x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x8ConvertToInt64(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int64x8, want func(x []uint64) []int64) { n := 8 t.Helper() forSlice(t, uint64s, n, func(x []uint64) bool { t.Helper() a := archsimd.LoadUint64x8Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat32x16ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat32x16ConvertToUint16(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint16x16, want func(x []float32) []uint16) { +// testFloat32x16ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x16ConvertToInt64(t *testing.T, f func(x archsimd.Float32x16) archsimd.Int64x8, want func(x []float32) []int64) { n := 16 t.Helper() forSlice(t, float32s, n, func(x []float32) bool { t.Helper() a := archsimd.LoadFloat32x16Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) }) } -// testFloat64x8ConvertToUint16 tests the simd conversion method f against the expected behavior generated by want -// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width. -func testFloat64x8ConvertToUint16(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint16x8, want func(x []float64) []uint16) { +// testFloat64x8ConvertToInt64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x8ConvertToInt64(t *testing.T, f func(x archsimd.Float64x8) archsimd.Int64x8, want func(x []float64) []int64) { n := 8 t.Helper() forSlice(t, float64s, n, func(x []float64) bool { t.Helper() a := archsimd.LoadFloat64x8Slice(x) - g := make([]uint16, n) + g := make([]int64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x16ConvertToUint64(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint64x8, want func(x []int8) []uint64) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x8ConvertToUint64(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint64x8, want func(x []int16) []uint64) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x4ConvertToUint64(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint64x4, want func(x []int32) []uint64) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToUint64(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint64x2, want func(x []int64) []uint64) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x16ConvertToUint64(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint64x8, want func(x []uint8) []uint64) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x8ConvertToUint64(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint64x8, want func(x []uint16) []uint64) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x4ConvertToUint64(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint64x4, want func(x []uint32) []uint64) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToUint64(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint64x2, want func(x []uint64) []uint64) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x4ConvertToUint64(t *testing.T, f func(x archsimd.Float32x4) archsimd.Uint64x4, want func(x []float32) []uint64) { + n := 4 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToUint64(t *testing.T, f func(x archsimd.Float64x2) archsimd.Uint64x2, want func(x []float64) []uint64) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToUint64(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint64x8, want func(x []int8) []uint64) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x16ConvertToUint64(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint64x8, want func(x []int16) []uint64) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x8ConvertToUint64(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint64x8, want func(x []int32) []uint64) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x4ConvertToUint64(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint64x4, want func(x []int64) []uint64) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToUint64(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint64x8, want func(x []uint8) []uint64) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x16ConvertToUint64(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint64x8, want func(x []uint16) []uint64) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x8ConvertToUint64(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint64x8, want func(x []uint32) []uint64) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x4ConvertToUint64(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint64x4, want func(x []uint64) []uint64) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x8ConvertToUint64(t *testing.T, f func(x archsimd.Float32x8) archsimd.Uint64x8, want func(x []float32) []uint64) { + n := 8 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x8Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x4ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x4ConvertToUint64(t *testing.T, f func(x archsimd.Float64x4) archsimd.Uint64x4, want func(x []float64) []uint64) { + n := 4 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToUint64(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint64x8, want func(x []int8) []uint64) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToUint64(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint64x8, want func(x []int16) []uint64) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x16ConvertToUint64(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint64x8, want func(x []int32) []uint64) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x8ConvertToUint64(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint64x8, want func(x []int64) []uint64) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToUint64(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint64x8, want func(x []uint8) []uint64) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToUint64(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint64x8, want func(x []uint16) []uint64) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x16ConvertToUint64(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint64x8, want func(x []uint32) []uint64) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x8ConvertToUint64(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint64x8, want func(x []uint64) []uint64) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x16ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x16ConvertToUint64(t *testing.T, f func(x archsimd.Float32x16) archsimd.Uint64x8, want func(x []float32) []uint64) { + n := 16 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x16Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x8ConvertToUint64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x8ConvertToUint64(t *testing.T, f func(x archsimd.Float64x8) archsimd.Uint64x8, want func(x []float64) []uint64) { + n := 8 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x8Slice(x) + g := make([]uint64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x16ConvertToFloat32(t *testing.T, f func(x archsimd.Int8x16) archsimd.Float32x16, want func(x []int8) []float32) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x8ConvertToFloat32(t *testing.T, f func(x archsimd.Int16x8) archsimd.Float32x8, want func(x []int16) []float32) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]float32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x4ConvertToFloat32(t *testing.T, f func(x archsimd.Int32x4) archsimd.Float32x4, want func(x []int32) []float32) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]float32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToFloat32(t *testing.T, f func(x archsimd.Int64x2) archsimd.Float32x4, want func(x []int64) []float32) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]float32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x16ConvertToFloat32(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Float32x16, want func(x []uint8) []float32) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x8ConvertToFloat32(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Float32x8, want func(x []uint16) []float32) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]float32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x4ConvertToFloat32(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Float32x4, want func(x []uint32) []float32) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]float32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToFloat32(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Float32x4, want func(x []uint64) []float32) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]float32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x4ConvertToFloat32(t *testing.T, f func(x archsimd.Float32x4) archsimd.Float32x4, want func(x []float32) []float32) { + n := 4 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x4Slice(x) + g := make([]float32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToFloat32(t *testing.T, f func(x archsimd.Float64x2) archsimd.Float32x4, want func(x []float64) []float32) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]float32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToFloat32(t *testing.T, f func(x archsimd.Int8x32) archsimd.Float32x16, want func(x []int8) []float32) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x16ConvertToFloat32(t *testing.T, f func(x archsimd.Int16x16) archsimd.Float32x16, want func(x []int16) []float32) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x8ConvertToFloat32(t *testing.T, f func(x archsimd.Int32x8) archsimd.Float32x8, want func(x []int32) []float32) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]float32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x4ConvertToFloat32(t *testing.T, f func(x archsimd.Int64x4) archsimd.Float32x4, want func(x []int64) []float32) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]float32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToFloat32(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Float32x16, want func(x []uint8) []float32) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x16ConvertToFloat32(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Float32x16, want func(x []uint16) []float32) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x8ConvertToFloat32(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Float32x8, want func(x []uint32) []float32) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]float32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x4ConvertToFloat32(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Float32x4, want func(x []uint64) []float32) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]float32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x8ConvertToFloat32(t *testing.T, f func(x archsimd.Float32x8) archsimd.Float32x8, want func(x []float32) []float32) { + n := 8 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x8Slice(x) + g := make([]float32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x4ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x4ConvertToFloat32(t *testing.T, f func(x archsimd.Float64x4) archsimd.Float32x4, want func(x []float64) []float32) { + n := 4 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x4Slice(x) + g := make([]float32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToFloat32(t *testing.T, f func(x archsimd.Int8x64) archsimd.Float32x16, want func(x []int8) []float32) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToFloat32(t *testing.T, f func(x archsimd.Int16x32) archsimd.Float32x16, want func(x []int16) []float32) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x16ConvertToFloat32(t *testing.T, f func(x archsimd.Int32x16) archsimd.Float32x16, want func(x []int32) []float32) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x8ConvertToFloat32(t *testing.T, f func(x archsimd.Int64x8) archsimd.Float32x8, want func(x []int64) []float32) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]float32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToFloat32(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Float32x16, want func(x []uint8) []float32) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToFloat32(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Float32x16, want func(x []uint16) []float32) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x16ConvertToFloat32(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Float32x16, want func(x []uint32) []float32) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x8ConvertToFloat32(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Float32x8, want func(x []uint64) []float32) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]float32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x16ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x16ConvertToFloat32(t *testing.T, f func(x archsimd.Float32x16) archsimd.Float32x16, want func(x []float32) []float32) { + n := 16 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x16Slice(x) + g := make([]float32, 16) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x8ConvertToFloat32 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x8ConvertToFloat32(t *testing.T, f func(x archsimd.Float64x8) archsimd.Float32x8, want func(x []float64) []float32) { + n := 8 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x8Slice(x) + g := make([]float32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x16ConvertToFloat64(t *testing.T, f func(x archsimd.Int8x16) archsimd.Float64x8, want func(x []int8) []float64) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x8ConvertToFloat64(t *testing.T, f func(x archsimd.Int16x8) archsimd.Float64x8, want func(x []int16) []float64) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x4ConvertToFloat64(t *testing.T, f func(x archsimd.Int32x4) archsimd.Float64x4, want func(x []int32) []float64) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]float64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x2ConvertToFloat64(t *testing.T, f func(x archsimd.Int64x2) archsimd.Float64x2, want func(x []int64) []float64) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]float64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x16ConvertToFloat64(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Float64x8, want func(x []uint8) []float64) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x8ConvertToFloat64(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Float64x8, want func(x []uint16) []float64) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x4ConvertToFloat64(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Float64x4, want func(x []uint32) []float64) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]float64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x2ConvertToFloat64(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Float64x2, want func(x []uint64) []float64) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]float64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x4ConvertToFloat64(t *testing.T, f func(x archsimd.Float32x4) archsimd.Float64x4, want func(x []float32) []float64) { + n := 4 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x4Slice(x) + g := make([]float64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x2ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x2ConvertToFloat64(t *testing.T, f func(x archsimd.Float64x2) archsimd.Float64x2, want func(x []float64) []float64) { + n := 2 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x2Slice(x) + g := make([]float64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x32ConvertToFloat64(t *testing.T, f func(x archsimd.Int8x32) archsimd.Float64x8, want func(x []int8) []float64) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x16ConvertToFloat64(t *testing.T, f func(x archsimd.Int16x16) archsimd.Float64x8, want func(x []int16) []float64) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x8ConvertToFloat64(t *testing.T, f func(x archsimd.Int32x8) archsimd.Float64x8, want func(x []int32) []float64) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x4ConvertToFloat64(t *testing.T, f func(x archsimd.Int64x4) archsimd.Float64x4, want func(x []int64) []float64) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]float64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x32ConvertToFloat64(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Float64x8, want func(x []uint8) []float64) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x16ConvertToFloat64(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Float64x8, want func(x []uint16) []float64) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x8ConvertToFloat64(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Float64x8, want func(x []uint32) []float64) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x4ConvertToFloat64(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Float64x4, want func(x []uint64) []float64) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]float64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x8ConvertToFloat64(t *testing.T, f func(x archsimd.Float32x8) archsimd.Float64x8, want func(x []float32) []float64) { + n := 8 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x8Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x4ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x4ConvertToFloat64(t *testing.T, f func(x archsimd.Float64x4) archsimd.Float64x4, want func(x []float64) []float64) { + n := 4 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x4Slice(x) + g := make([]float64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt8x64ConvertToFloat64(t *testing.T, f func(x archsimd.Int8x64) archsimd.Float64x8, want func(x []int8) []float64) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt16x32ConvertToFloat64(t *testing.T, f func(x archsimd.Int16x32) archsimd.Float64x8, want func(x []int16) []float64) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt32x16ConvertToFloat64(t *testing.T, f func(x archsimd.Int32x16) archsimd.Float64x8, want func(x []int32) []float64) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testInt64x8ConvertToFloat64(t *testing.T, f func(x archsimd.Int64x8) archsimd.Float64x8, want func(x []int64) []float64) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint8x64ConvertToFloat64(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Float64x8, want func(x []uint8) []float64) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint16x32ConvertToFloat64(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Float64x8, want func(x []uint16) []float64) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint32x16ConvertToFloat64(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Float64x8, want func(x []uint32) []float64) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testUint64x8ConvertToFloat64(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Float64x8, want func(x []uint64) []float64) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat32x16ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat32x16ConvertToFloat64(t *testing.T, f func(x archsimd.Float32x16) archsimd.Float64x8, want func(x []float32) []float64) { + n := 16 + t.Helper() + forSlice(t, float32s, n, func(x []float32) bool { + t.Helper() + a := archsimd.LoadFloat32x16Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testFloat64x8ConvertToFloat64 tests the simd conversion method f against the expected behavior generated by want. +// This is for count-preserving conversions, so if there is a change in size, then there is a change in vector width, +// (extended to at least 128 bits, or truncated to at most 512 bits). +func testFloat64x8ConvertToFloat64(t *testing.T, f func(x archsimd.Float64x8) archsimd.Float64x8, want func(x []float64) []float64) { + n := 8 + t.Helper() + forSlice(t, float64s, n, func(x []float64) bool { + t.Helper() + a := archsimd.LoadFloat64x8Slice(x) + g := make([]float64, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt8x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int64x2, want func(x []int8) []int64) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt16x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int64x2, want func(x []int16) []int64) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt32x4ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int64x2, want func(x []int32) []int64) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt64x2ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int64x2, want func(x []int64) []int64) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint8x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int64x2, want func(x []uint8) []int64) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint16x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int64x2, want func(x []uint16) []int64) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint32x4ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int64x2, want func(x []uint32) []int64) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint64x2ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int64x2, want func(x []uint64) []int64) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt8x32ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int64x2, want func(x []int8) []int64) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt16x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int64x2, want func(x []int16) []int64) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt32x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int64x2, want func(x []int32) []int64) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt64x4ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int64x2, want func(x []int64) []int64) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint8x32ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int64x2, want func(x []uint8) []int64) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint16x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int64x2, want func(x []uint16) []int64) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint32x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int64x2, want func(x []uint32) []int64) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint64x4ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int64x2, want func(x []uint64) []int64) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt8x64ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int64x2, want func(x []int8) []int64) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt16x32ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int64x2, want func(x []int16) []int64) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt32x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int64x2, want func(x []int32) []int64) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt64x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int64x2, want func(x []int64) []int64) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint8x64ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int64x2, want func(x []uint8) []int64) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint16x32ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int64x2, want func(x []uint16) []int64) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint32x16ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int64x2, want func(x []uint32) []int64) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToInt64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint64x8ConvertLoToInt64x2(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int64x2, want func(x []uint64) []int64) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]int64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int64x4, want func(x []int8) []int64) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int64x4, want func(x []int16) []int64) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x4ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int64x4, want func(x []int32) []int64) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x2ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int64x4, want func(x []int64) []int64) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int64x4, want func(x []uint8) []int64) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int64x4, want func(x []uint16) []int64) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x4ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int64x4, want func(x []uint32) []int64) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x2ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int64x4, want func(x []uint64) []int64) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x32ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int64x4, want func(x []int8) []int64) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int64x4, want func(x []int16) []int64) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int64x4, want func(x []int32) []int64) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x4ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int64x4, want func(x []int64) []int64) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x32ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int64x4, want func(x []uint8) []int64) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int64x4, want func(x []uint16) []int64) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int64x4, want func(x []uint32) []int64) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x4ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int64x4, want func(x []uint64) []int64) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x64ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int64x4, want func(x []int8) []int64) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x32ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int64x4, want func(x []int16) []int64) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int64x4, want func(x []int32) []int64) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int64x4, want func(x []int64) []int64) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x64ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int64x4, want func(x []uint8) []int64) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x32ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int64x4, want func(x []uint16) []int64) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x16ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int64x4, want func(x []uint32) []int64) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToInt64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x8ConvertLoToInt64x4(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int64x4, want func(x []uint64) []int64) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]int64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt8x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint64x2, want func(x []int8) []uint64) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt16x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint64x2, want func(x []int16) []uint64) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt32x4ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint64x2, want func(x []int32) []uint64) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt64x2ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint64x2, want func(x []int64) []uint64) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint8x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint64x2, want func(x []uint8) []uint64) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint16x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint64x2, want func(x []uint16) []uint64) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint32x4ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint64x2, want func(x []uint32) []uint64) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint64x2ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint64x2, want func(x []uint64) []uint64) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt8x32ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint64x2, want func(x []int8) []uint64) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt16x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint64x2, want func(x []int16) []uint64) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt32x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint64x2, want func(x []int32) []uint64) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt64x4ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint64x2, want func(x []int64) []uint64) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint8x32ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint64x2, want func(x []uint8) []uint64) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint16x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint64x2, want func(x []uint16) []uint64) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint32x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint64x2, want func(x []uint32) []uint64) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint64x4ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint64x2, want func(x []uint64) []uint64) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt8x64ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint64x2, want func(x []int8) []uint64) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt16x32ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint64x2, want func(x []int16) []uint64) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt32x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint64x2, want func(x []int32) []uint64) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testInt64x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint64x2, want func(x []int64) []uint64) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint8x64ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint64x2, want func(x []uint8) []uint64) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint16x32ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint64x2, want func(x []uint16) []uint64) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint32x16ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint64x2, want func(x []uint32) []uint64) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToUint64x2 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 2 elements. +func testUint64x8ConvertLoToUint64x2(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint64x2, want func(x []uint64) []uint64) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]uint64, 2) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint64x4, want func(x []int8) []uint64) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint64x4, want func(x []int16) []uint64) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x4ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint64x4, want func(x []int32) []uint64) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x2ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint64x4, want func(x []int64) []uint64) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint64x4, want func(x []uint8) []uint64) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint64x4, want func(x []uint16) []uint64) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x4ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint64x4, want func(x []uint32) []uint64) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x2ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint64x4, want func(x []uint64) []uint64) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x32ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint64x4, want func(x []int8) []uint64) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint64x4, want func(x []int16) []uint64) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint64x4, want func(x []int32) []uint64) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x4ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint64x4, want func(x []int64) []uint64) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x32ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint64x4, want func(x []uint8) []uint64) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint64x4, want func(x []uint16) []uint64) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint64x4, want func(x []uint32) []uint64) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x4ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint64x4, want func(x []uint64) []uint64) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x64ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint64x4, want func(x []int8) []uint64) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x32ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint64x4, want func(x []int16) []uint64) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint64x4, want func(x []int32) []uint64) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint64x4, want func(x []int64) []uint64) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x64ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint64x4, want func(x []uint8) []uint64) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x32ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint64x4, want func(x []uint16) []uint64) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x16ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint64x4, want func(x []uint32) []uint64) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToUint64x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x8ConvertLoToUint64x4(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint64x4, want func(x []uint64) []uint64) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]uint64, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int32x4, want func(x []int8) []int32) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int32x4, want func(x []int16) []int32) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x4ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int32x4, want func(x []int32) []int32) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x2ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int32x4, want func(x []int64) []int32) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int32x4, want func(x []uint8) []int32) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int32x4, want func(x []uint16) []int32) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x4ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int32x4, want func(x []uint32) []int32) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x2ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int32x4, want func(x []uint64) []int32) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x32ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int32x4, want func(x []int8) []int32) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int32x4, want func(x []int16) []int32) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int32x4, want func(x []int32) []int32) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x4ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int32x4, want func(x []int64) []int32) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x32ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int32x4, want func(x []uint8) []int32) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int32x4, want func(x []uint16) []int32) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int32x4, want func(x []uint32) []int32) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x4ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int32x4, want func(x []uint64) []int32) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x64ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int32x4, want func(x []int8) []int32) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x32ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int32x4, want func(x []int16) []int32) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int32x4, want func(x []int32) []int32) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int32x4, want func(x []int64) []int32) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x64ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int32x4, want func(x []uint8) []int32) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x32ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int32x4, want func(x []uint16) []int32) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x16ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int32x4, want func(x []uint32) []int32) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToInt32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x8ConvertLoToInt32x4(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int32x4, want func(x []uint64) []int32) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]int32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int32x8, want func(x []int8) []int32) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int32x8, want func(x []int16) []int32) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x4ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int32x8, want func(x []int32) []int32) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x2ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int32x8, want func(x []int64) []int32) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int32x8, want func(x []uint8) []int32) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int32x8, want func(x []uint16) []int32) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x4ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int32x8, want func(x []uint32) []int32) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x2ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int32x8, want func(x []uint64) []int32) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x32ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int32x8, want func(x []int8) []int32) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int32x8, want func(x []int16) []int32) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int32x8, want func(x []int32) []int32) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x4ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int32x8, want func(x []int64) []int32) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x32ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int32x8, want func(x []uint8) []int32) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int32x8, want func(x []uint16) []int32) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int32x8, want func(x []uint32) []int32) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x4ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int32x8, want func(x []uint64) []int32) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x64ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int32x8, want func(x []int8) []int32) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x32ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int32x8, want func(x []int16) []int32) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int32x8, want func(x []int32) []int32) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int32x8, want func(x []int64) []int32) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x64ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int32x8, want func(x []uint8) []int32) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x32ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int32x8, want func(x []uint16) []int32) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x16ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int32x8, want func(x []uint32) []int32) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToInt32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x8ConvertLoToInt32x8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int32x8, want func(x []uint64) []int32) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]int32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint32x4, want func(x []int8) []uint32) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint32x4, want func(x []int16) []uint32) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x4ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint32x4, want func(x []int32) []uint32) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x2ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint32x4, want func(x []int64) []uint32) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint32x4, want func(x []uint8) []uint32) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint32x4, want func(x []uint16) []uint32) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x4ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint32x4, want func(x []uint32) []uint32) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x2ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint32x4, want func(x []uint64) []uint32) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x32ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint32x4, want func(x []int8) []uint32) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint32x4, want func(x []int16) []uint32) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint32x4, want func(x []int32) []uint32) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x4ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint32x4, want func(x []int64) []uint32) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x32ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint32x4, want func(x []uint8) []uint32) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint32x4, want func(x []uint16) []uint32) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint32x4, want func(x []uint32) []uint32) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x4ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint32x4, want func(x []uint64) []uint32) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt8x64ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint32x4, want func(x []int8) []uint32) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt16x32ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint32x4, want func(x []int16) []uint32) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt32x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint32x4, want func(x []int32) []uint32) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testInt64x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint32x4, want func(x []int64) []uint32) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint8x64ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint32x4, want func(x []uint8) []uint32) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint16x32ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint32x4, want func(x []uint16) []uint32) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint32x16ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint32x4, want func(x []uint32) []uint32) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToUint32x4 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 4 elements. +func testUint64x8ConvertLoToUint32x4(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint32x4, want func(x []uint64) []uint32) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]uint32, 4) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint32x8, want func(x []int8) []uint32) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint32x8, want func(x []int16) []uint32) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x4ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint32x8, want func(x []int32) []uint32) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x2ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint32x8, want func(x []int64) []uint32) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint32x8, want func(x []uint8) []uint32) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint32x8, want func(x []uint16) []uint32) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x4ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint32x8, want func(x []uint32) []uint32) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x2ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint32x8, want func(x []uint64) []uint32) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x32ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint32x8, want func(x []int8) []uint32) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint32x8, want func(x []int16) []uint32) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint32x8, want func(x []int32) []uint32) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x4ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint32x8, want func(x []int64) []uint32) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x32ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint32x8, want func(x []uint8) []uint32) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint32x8, want func(x []uint16) []uint32) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint32x8, want func(x []uint32) []uint32) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x4ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint32x8, want func(x []uint64) []uint32) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x64ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint32x8, want func(x []int8) []uint32) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x32ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint32x8, want func(x []int16) []uint32) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint32x8, want func(x []int32) []uint32) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint32x8, want func(x []int64) []uint32) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x64ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint32x8, want func(x []uint8) []uint32) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x32ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint32x8, want func(x []uint16) []uint32) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x16ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint32x8, want func(x []uint32) []uint32) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToUint32x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x8ConvertLoToUint32x8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint32x8, want func(x []uint64) []uint32) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]uint32, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Int16x8, want func(x []int8) []int16) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Int16x8, want func(x []int16) []int16) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x4ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Int16x8, want func(x []int32) []int16) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x2ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Int16x8, want func(x []int64) []int16) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Int16x8, want func(x []uint8) []int16) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Int16x8, want func(x []uint16) []int16) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x4ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Int16x8, want func(x []uint32) []int16) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x2ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Int16x8, want func(x []uint64) []int16) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x32ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Int16x8, want func(x []int8) []int16) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Int16x8, want func(x []int16) []int16) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Int16x8, want func(x []int32) []int16) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x4ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Int16x8, want func(x []int64) []int16) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x32ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Int16x8, want func(x []uint8) []int16) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Int16x8, want func(x []uint16) []int16) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Int16x8, want func(x []uint32) []int16) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x4ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Int16x8, want func(x []uint64) []int16) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x64ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Int16x8, want func(x []int8) []int16) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x32ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Int16x8, want func(x []int16) []int16) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Int16x8, want func(x []int32) []int16) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Int16x8, want func(x []int64) []int16) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x64ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Int16x8, want func(x []uint8) []int16) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x32ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Int16x8, want func(x []uint16) []int16) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x16ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Int16x8, want func(x []uint32) []int16) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToInt16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x8ConvertLoToInt16x8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Int16x8, want func(x []uint64) []int16) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]int16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int8x16) archsimd.Uint16x8, want func(x []int8) []uint16) { + n := 16 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x16Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int16x8) archsimd.Uint16x8, want func(x []int16) []uint16) { + n := 8 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x4ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x4ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int32x4) archsimd.Uint16x8, want func(x []int32) []uint16) { + n := 4 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x2ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x2ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int64x2) archsimd.Uint16x8, want func(x []int64) []uint16) { + n := 2 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x2Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint8x16) archsimd.Uint16x8, want func(x []uint8) []uint16) { + n := 16 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x16Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint16x8) archsimd.Uint16x8, want func(x []uint16) []uint16) { + n := 8 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x4ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x4ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint32x4) archsimd.Uint16x8, want func(x []uint32) []uint16) { + n := 4 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x2ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x2ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint64x2) archsimd.Uint16x8, want func(x []uint64) []uint16) { + n := 2 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x2Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x32ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x32ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int8x32) archsimd.Uint16x8, want func(x []int8) []uint16) { + n := 32 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x32Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int16x16) archsimd.Uint16x8, want func(x []int16) []uint16) { + n := 16 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x16Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int32x8) archsimd.Uint16x8, want func(x []int32) []uint16) { + n := 8 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x4ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x4ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int64x4) archsimd.Uint16x8, want func(x []int64) []uint16) { + n := 4 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x32ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x32ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint8x32) archsimd.Uint16x8, want func(x []uint8) []uint16) { + n := 32 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x32Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint16x16) archsimd.Uint16x8, want func(x []uint16) []uint16) { + n := 16 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x16Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint32x8) archsimd.Uint16x8, want func(x []uint32) []uint16) { + n := 8 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x4ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x4ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint64x4) archsimd.Uint16x8, want func(x []uint64) []uint16) { + n := 4 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x4Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt8x64ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt8x64ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int8x64) archsimd.Uint16x8, want func(x []int8) []uint16) { + n := 64 + t.Helper() + forSlice(t, int8s, n, func(x []int8) bool { + t.Helper() + a := archsimd.LoadInt8x64Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt16x32ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt16x32ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int16x32) archsimd.Uint16x8, want func(x []int16) []uint16) { + n := 32 + t.Helper() + forSlice(t, int16s, n, func(x []int16) bool { + t.Helper() + a := archsimd.LoadInt16x32Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt32x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt32x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int32x16) archsimd.Uint16x8, want func(x []int32) []uint16) { + n := 16 + t.Helper() + forSlice(t, int32s, n, func(x []int32) bool { + t.Helper() + a := archsimd.LoadInt32x16Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testInt64x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testInt64x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Int64x8) archsimd.Uint16x8, want func(x []int64) []uint16) { + n := 8 + t.Helper() + forSlice(t, int64s, n, func(x []int64) bool { + t.Helper() + a := archsimd.LoadInt64x8Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint8x64ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint8x64ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint8x64) archsimd.Uint16x8, want func(x []uint8) []uint16) { + n := 64 + t.Helper() + forSlice(t, uint8s, n, func(x []uint8) bool { + t.Helper() + a := archsimd.LoadUint8x64Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint16x32ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint16x32ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint16x32) archsimd.Uint16x8, want func(x []uint16) []uint16) { + n := 32 + t.Helper() + forSlice(t, uint16s, n, func(x []uint16) bool { + t.Helper() + a := archsimd.LoadUint16x32Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint32x16ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint32x16ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint32x16) archsimd.Uint16x8, want func(x []uint32) []uint16) { + n := 16 + t.Helper() + forSlice(t, uint32s, n, func(x []uint32) bool { + t.Helper() + a := archsimd.LoadUint32x16Slice(x) + g := make([]uint16, 8) + f(a).StoreSlice(g) + w := want(x) + return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) + }) +} + +// testUint64x8ConvertLoToUint16x8 tests the simd conversion method f against the expected behavior generated by want. +// This converts only the low 8 elements. +func testUint64x8ConvertLoToUint16x8(t *testing.T, f func(x archsimd.Uint64x8) archsimd.Uint16x8, want func(x []uint64) []uint16) { + n := 8 + t.Helper() + forSlice(t, uint64s, n, func(x []uint64) bool { + t.Helper() + a := archsimd.LoadUint64x8Slice(x) + g := make([]uint16, 8) f(a).StoreSlice(g) w := want(x) return checkSlicesLogInput(t, g, w, 0.0, func() { t.Helper(); t.Logf("x=%v", x) }) diff --git a/src/simd/archsimd/internal/simd_test/unary_test.go b/src/simd/archsimd/internal/simd_test/unary_test.go index 6b53669d78..097feb60ed 100644 --- a/src/simd/archsimd/internal/simd_test/unary_test.go +++ b/src/simd/archsimd/internal/simd_test/unary_test.go @@ -69,20 +69,26 @@ func TestSqrt(t *testing.T) { func TestNot(t *testing.T) { testInt8x16Unary(t, archsimd.Int8x16.Not, map1[int8](not)) - testInt8x32Unary(t, archsimd.Int8x32.Not, map1[int8](not)) testInt16x8Unary(t, archsimd.Int16x8.Not, map1[int16](not)) - testInt16x16Unary(t, archsimd.Int16x16.Not, map1[int16](not)) testInt32x4Unary(t, archsimd.Int32x4.Not, map1[int32](not)) - testInt32x8Unary(t, archsimd.Int32x8.Not, map1[int32](not)) + + if archsimd.X86.AVX2() { + testInt8x32Unary(t, archsimd.Int8x32.Not, map1[int8](not)) + testInt16x16Unary(t, archsimd.Int16x16.Not, map1[int16](not)) + testInt32x8Unary(t, archsimd.Int32x8.Not, map1[int32](not)) + } } func TestAbsolute(t *testing.T) { testInt8x16Unary(t, archsimd.Int8x16.Abs, map1[int8](abs)) - testInt8x32Unary(t, archsimd.Int8x32.Abs, map1[int8](abs)) testInt16x8Unary(t, archsimd.Int16x8.Abs, map1[int16](abs)) - testInt16x16Unary(t, archsimd.Int16x16.Abs, map1[int16](abs)) testInt32x4Unary(t, archsimd.Int32x4.Abs, map1[int32](abs)) - testInt32x8Unary(t, archsimd.Int32x8.Abs, map1[int32](abs)) + + if archsimd.X86.AVX2() { + testInt8x32Unary(t, archsimd.Int8x32.Abs, map1[int8](abs)) + testInt16x16Unary(t, archsimd.Int16x16.Abs, map1[int16](abs)) + testInt32x8Unary(t, archsimd.Int32x8.Abs, map1[int32](abs)) + } if archsimd.X86.AVX512() { testInt8x64Unary(t, archsimd.Int8x64.Abs, map1[int8](abs)) testInt16x32Unary(t, archsimd.Int16x32.Abs, map1[int16](abs)) @@ -110,28 +116,199 @@ func TestCeilScaledResidue(t *testing.T) { map1[float64](func(x float64) float64 { return x - math.Ceil(x) })) } -func TestToUint32(t *testing.T) { - if !archsimd.X86.AVX512() { - t.Skip("Needs AVX512") +func TestConvert(t *testing.T) { + testFloat64x2ConvertToFloat32(t, archsimd.Float64x2.ConvertToFloat32, map1n[float64](toFloat32, 4)) + testFloat64x4ConvertToFloat32(t, archsimd.Float64x4.ConvertToFloat32, map1[float64](toFloat32)) + testFloat32x4ConvertToFloat64(t, archsimd.Float32x4.ConvertToFloat64, map1[float32](toFloat64)) + + testFloat32x4ConvertToInt32(t, archsimd.Float32x4.ConvertToInt32, map1[float32](floatToInt32_x86)) + testFloat32x8ConvertToInt32(t, archsimd.Float32x8.ConvertToInt32, map1[float32](floatToInt32_x86)) + testFloat64x2ConvertToInt32(t, archsimd.Float64x2.ConvertToInt32, map1n[float64](floatToInt32_x86, 4)) + testFloat64x4ConvertToInt32(t, archsimd.Float64x4.ConvertToInt32, map1[float64](floatToInt32_x86)) + + testInt32x4ConvertToFloat32(t, archsimd.Int32x4.ConvertToFloat32, map1[int32](toFloat32)) + testInt32x8ConvertToFloat32(t, archsimd.Int32x8.ConvertToFloat32, map1[int32](toFloat32)) + testInt32x4ConvertToFloat64(t, archsimd.Int32x4.ConvertToFloat64, map1[int32](toFloat64)) + + if archsimd.X86.AVX512() { + testFloat32x8ConvertToFloat64(t, archsimd.Float32x8.ConvertToFloat64, map1[float32](toFloat64)) + testFloat64x8ConvertToFloat32(t, archsimd.Float64x8.ConvertToFloat32, map1[float64](toFloat32)) + + testFloat32x16ConvertToInt32(t, archsimd.Float32x16.ConvertToInt32, map1[float32](floatToInt32_x86)) + testFloat64x8ConvertToInt32(t, archsimd.Float64x8.ConvertToInt32, map1[float64](floatToInt32_x86)) + testFloat32x4ConvertToInt64(t, archsimd.Float32x4.ConvertToInt64, map1[float32](floatToInt64_x86)) + testFloat32x8ConvertToInt64(t, archsimd.Float32x8.ConvertToInt64, map1[float32](floatToInt64_x86)) + testFloat64x2ConvertToInt64(t, archsimd.Float64x2.ConvertToInt64, map1[float64](floatToInt64_x86)) + testFloat64x4ConvertToInt64(t, archsimd.Float64x4.ConvertToInt64, map1[float64](floatToInt64_x86)) + testFloat64x8ConvertToInt64(t, archsimd.Float64x8.ConvertToInt64, map1[float64](floatToInt64_x86)) + + testFloat32x4ConvertToUint32(t, archsimd.Float32x4.ConvertToUint32, map1[float32](floatToUint32_x86)) + testFloat32x8ConvertToUint32(t, archsimd.Float32x8.ConvertToUint32, map1[float32](floatToUint32_x86)) + testFloat32x16ConvertToUint32(t, archsimd.Float32x16.ConvertToUint32, map1[float32](floatToUint32_x86)) + testFloat64x2ConvertToUint32(t, archsimd.Float64x2.ConvertToUint32, map1n[float64](floatToUint32_x86, 4)) + testFloat64x4ConvertToUint32(t, archsimd.Float64x4.ConvertToUint32, map1[float64](floatToUint32_x86)) + testFloat64x8ConvertToUint32(t, archsimd.Float64x8.ConvertToUint32, map1[float64](floatToUint32_x86)) + testFloat32x4ConvertToUint64(t, archsimd.Float32x4.ConvertToUint64, map1[float32](floatToUint64_x86)) + testFloat32x8ConvertToUint64(t, archsimd.Float32x8.ConvertToUint64, map1[float32](floatToUint64_x86)) + testFloat64x2ConvertToUint64(t, archsimd.Float64x2.ConvertToUint64, map1[float64](floatToUint64_x86)) + testFloat64x4ConvertToUint64(t, archsimd.Float64x4.ConvertToUint64, map1[float64](floatToUint64_x86)) + testFloat64x8ConvertToUint64(t, archsimd.Float64x8.ConvertToUint64, map1[float64](floatToUint64_x86)) + + testInt32x16ConvertToFloat32(t, archsimd.Int32x16.ConvertToFloat32, map1[int32](toFloat32)) + testInt64x2ConvertToFloat32(t, archsimd.Int64x2.ConvertToFloat32, map1n[int64](toFloat32, 4)) + testInt64x4ConvertToFloat32(t, archsimd.Int64x4.ConvertToFloat32, map1[int64](toFloat32)) + testInt64x8ConvertToFloat32(t, archsimd.Int64x8.ConvertToFloat32, map1[int64](toFloat32)) + testInt64x2ConvertToFloat64(t, archsimd.Int64x2.ConvertToFloat64, map1[int64](toFloat64)) + testInt64x4ConvertToFloat64(t, archsimd.Int64x4.ConvertToFloat64, map1[int64](toFloat64)) + testInt64x8ConvertToFloat64(t, archsimd.Int64x8.ConvertToFloat64, map1[int64](toFloat64)) + + testUint32x4ConvertToFloat32(t, archsimd.Uint32x4.ConvertToFloat32, map1[uint32](toFloat32)) + testUint32x8ConvertToFloat32(t, archsimd.Uint32x8.ConvertToFloat32, map1[uint32](toFloat32)) + testUint32x16ConvertToFloat32(t, archsimd.Uint32x16.ConvertToFloat32, map1[uint32](toFloat32)) + testUint64x2ConvertToFloat32(t, archsimd.Uint64x2.ConvertToFloat32, map1n[uint64](toFloat32, 4)) + testUint64x4ConvertToFloat32(t, archsimd.Uint64x4.ConvertToFloat32, map1[uint64](toFloat32)) + testUint64x8ConvertToFloat32(t, archsimd.Uint64x8.ConvertToFloat32, map1[uint64](toFloat32)) + testUint32x4ConvertToFloat64(t, archsimd.Uint32x4.ConvertToFloat64, map1[uint32](toFloat64)) + testUint32x8ConvertToFloat64(t, archsimd.Uint32x8.ConvertToFloat64, map1[uint32](toFloat64)) + testUint64x2ConvertToFloat64(t, archsimd.Uint64x2.ConvertToFloat64, map1[uint64](toFloat64)) + testUint64x4ConvertToFloat64(t, archsimd.Uint64x4.ConvertToFloat64, map1[uint64](toFloat64)) + testUint64x8ConvertToFloat64(t, archsimd.Uint64x8.ConvertToFloat64, map1[uint64](toFloat64)) + } +} + +func TestExtend(t *testing.T) { + if archsimd.X86.AVX2() { + testInt8x16ConvertToInt16(t, archsimd.Int8x16.ExtendToInt16, map1[int8](toInt16)) + testInt16x8ConvertToInt32(t, archsimd.Int16x8.ExtendToInt32, map1[int16](toInt32)) + testInt32x4ConvertToInt64(t, archsimd.Int32x4.ExtendToInt64, map1[int32](toInt64)) + testUint8x16ConvertToUint16(t, archsimd.Uint8x16.ExtendToUint16, map1[uint8](toUint16)) + testUint16x8ConvertToUint32(t, archsimd.Uint16x8.ExtendToUint32, map1[uint16](toUint32)) + testUint32x4ConvertToUint64(t, archsimd.Uint32x4.ExtendToUint64, map1[uint32](toUint64)) + } + + if archsimd.X86.AVX512() { + testInt8x32ConvertToInt16(t, archsimd.Int8x32.ExtendToInt16, map1[int8](toInt16)) + testInt8x16ConvertToInt32(t, archsimd.Int8x16.ExtendToInt32, map1[int8](toInt32)) + testInt16x16ConvertToInt32(t, archsimd.Int16x16.ExtendToInt32, map1[int16](toInt32)) + testInt16x8ConvertToInt64(t, archsimd.Int16x8.ExtendToInt64, map1[int16](toInt64)) + testInt32x8ConvertToInt64(t, archsimd.Int32x8.ExtendToInt64, map1[int32](toInt64)) + testUint8x32ConvertToUint16(t, archsimd.Uint8x32.ExtendToUint16, map1[uint8](toUint16)) + testUint8x16ConvertToUint32(t, archsimd.Uint8x16.ExtendToUint32, map1[uint8](toUint32)) + testUint16x16ConvertToUint32(t, archsimd.Uint16x16.ExtendToUint32, map1[uint16](toUint32)) + testUint16x8ConvertToUint64(t, archsimd.Uint16x8.ExtendToUint64, map1[uint16](toUint64)) + testUint32x8ConvertToUint64(t, archsimd.Uint32x8.ExtendToUint64, map1[uint32](toUint64)) } - testFloat32x4ConvertToUint32(t, archsimd.Float32x4.ConvertToUint32, map1[float32](toUint32)) - testFloat32x8ConvertToUint32(t, archsimd.Float32x8.ConvertToUint32, map1[float32](toUint32)) - testFloat32x16ConvertToUint32(t, archsimd.Float32x16.ConvertToUint32, map1[float32](toUint32)) } -func TestToInt32(t *testing.T) { - testFloat32x4ConvertToInt32(t, archsimd.Float32x4.ConvertToInt32, map1[float32](toInt32)) - testFloat32x8ConvertToInt32(t, archsimd.Float32x8.ConvertToInt32, map1[float32](toInt32)) +func TestExtendLo(t *testing.T) { + testInt8x16ConvertLoToInt64x2(t, archsimd.Int8x16.ExtendLo2ToInt64, map1n[int8](toInt64, 2)) + testInt16x8ConvertLoToInt64x2(t, archsimd.Int16x8.ExtendLo2ToInt64, map1n[int16](toInt64, 2)) + testInt32x4ConvertLoToInt64x2(t, archsimd.Int32x4.ExtendLo2ToInt64, map1n[int32](toInt64, 2)) + testUint8x16ConvertLoToUint64x2(t, archsimd.Uint8x16.ExtendLo2ToUint64, map1n[uint8](toUint64, 2)) + testUint16x8ConvertLoToUint64x2(t, archsimd.Uint16x8.ExtendLo2ToUint64, map1n[uint16](toUint64, 2)) + testUint32x4ConvertLoToUint64x2(t, archsimd.Uint32x4.ExtendLo2ToUint64, map1n[uint32](toUint64, 2)) + testInt8x16ConvertLoToInt32x4(t, archsimd.Int8x16.ExtendLo4ToInt32, map1n[int8](toInt32, 4)) + testInt16x8ConvertLoToInt32x4(t, archsimd.Int16x8.ExtendLo4ToInt32, map1n[int16](toInt32, 4)) + testUint8x16ConvertLoToUint32x4(t, archsimd.Uint8x16.ExtendLo4ToUint32, map1n[uint8](toUint32, 4)) + testUint16x8ConvertLoToUint32x4(t, archsimd.Uint16x8.ExtendLo4ToUint32, map1n[uint16](toUint32, 4)) + testInt8x16ConvertLoToInt16x8(t, archsimd.Int8x16.ExtendLo8ToInt16, map1n[int8](toInt16, 8)) + testUint8x16ConvertLoToUint16x8(t, archsimd.Uint8x16.ExtendLo8ToUint16, map1n[uint8](toUint16, 8)) + + if archsimd.X86.AVX2() { + testInt8x16ConvertLoToInt64x4(t, archsimd.Int8x16.ExtendLo4ToInt64, map1n[int8](toInt64, 4)) + testInt16x8ConvertLoToInt64x4(t, archsimd.Int16x8.ExtendLo4ToInt64, map1n[int16](toInt64, 4)) + testUint8x16ConvertLoToUint64x4(t, archsimd.Uint8x16.ExtendLo4ToUint64, map1n[uint8](toUint64, 4)) + testUint16x8ConvertLoToUint64x4(t, archsimd.Uint16x8.ExtendLo4ToUint64, map1n[uint16](toUint64, 4)) + testInt8x16ConvertLoToInt32x8(t, archsimd.Int8x16.ExtendLo8ToInt32, map1n[int8](toInt32, 8)) + testUint8x16ConvertLoToUint32x8(t, archsimd.Uint8x16.ExtendLo8ToUint32, map1n[uint8](toUint32, 8)) + } + + if archsimd.X86.AVX512() { + testInt8x16ConvertToInt64(t, archsimd.Int8x16.ExtendLo8ToInt64, map1n[int8](toInt64, 8)) + testUint8x16ConvertToUint64(t, archsimd.Uint8x16.ExtendLo8ToUint64, map1n[uint8](toUint64, 8)) + } } -func TestConverts(t *testing.T) { - testUint8x16ConvertToUint16(t, archsimd.Uint8x16.ExtendToUint16, map1[uint8](toUint16)) - testUint16x8ConvertToUint32(t, archsimd.Uint16x8.ExtendToUint32, map1[uint16](toUint32)) +func TestTruncate(t *testing.T) { + if archsimd.X86.AVX512() { + testInt16x8ConvertToInt8(t, archsimd.Int16x8.TruncateToInt8, map1n[int16](toInt8, 16)) + testInt16x16ConvertToInt8(t, archsimd.Int16x16.TruncateToInt8, map1[int16](toInt8)) + testInt16x32ConvertToInt8(t, archsimd.Int16x32.TruncateToInt8, map1[int16](toInt8)) + testInt32x4ConvertToInt8(t, archsimd.Int32x4.TruncateToInt8, map1n[int32](toInt8, 16)) + testInt32x8ConvertToInt8(t, archsimd.Int32x8.TruncateToInt8, map1n[int32](toInt8, 16)) + testInt32x16ConvertToInt8(t, archsimd.Int32x16.TruncateToInt8, map1[int32](toInt8)) + testInt64x2ConvertToInt8(t, archsimd.Int64x2.TruncateToInt8, map1n[int64](toInt8, 16)) + testInt64x4ConvertToInt8(t, archsimd.Int64x4.TruncateToInt8, map1n[int64](toInt8, 16)) + testInt64x8ConvertToInt8(t, archsimd.Int64x8.TruncateToInt8, map1n[int64](toInt8, 16)) + testInt32x4ConvertToInt16(t, archsimd.Int32x4.TruncateToInt16, map1n[int32](toInt16, 8)) + testInt32x8ConvertToInt16(t, archsimd.Int32x8.TruncateToInt16, map1[int32](toInt16)) + testInt32x16ConvertToInt16(t, archsimd.Int32x16.TruncateToInt16, map1[int32](toInt16)) + testInt64x2ConvertToInt16(t, archsimd.Int64x2.TruncateToInt16, map1n[int64](toInt16, 8)) + testInt64x4ConvertToInt16(t, archsimd.Int64x4.TruncateToInt16, map1n[int64](toInt16, 8)) + testInt64x8ConvertToInt16(t, archsimd.Int64x8.TruncateToInt16, map1[int64](toInt16)) + testInt64x2ConvertToInt32(t, archsimd.Int64x2.TruncateToInt32, map1n[int64](toInt32, 4)) + testInt64x4ConvertToInt32(t, archsimd.Int64x4.TruncateToInt32, map1[int64](toInt32)) + testInt64x8ConvertToInt32(t, archsimd.Int64x8.TruncateToInt32, map1[int64](toInt32)) + + testUint16x8ConvertToUint8(t, archsimd.Uint16x8.TruncateToUint8, map1n[uint16](toUint8, 16)) + testUint16x16ConvertToUint8(t, archsimd.Uint16x16.TruncateToUint8, map1[uint16](toUint8)) + testUint16x32ConvertToUint8(t, archsimd.Uint16x32.TruncateToUint8, map1[uint16](toUint8)) + testUint32x4ConvertToUint8(t, archsimd.Uint32x4.TruncateToUint8, map1n[uint32](toUint8, 16)) + testUint32x8ConvertToUint8(t, archsimd.Uint32x8.TruncateToUint8, map1n[uint32](toUint8, 16)) + testUint32x16ConvertToUint8(t, archsimd.Uint32x16.TruncateToUint8, map1[uint32](toUint8)) + testUint64x2ConvertToUint8(t, archsimd.Uint64x2.TruncateToUint8, map1n[uint64](toUint8, 16)) + testUint64x4ConvertToUint8(t, archsimd.Uint64x4.TruncateToUint8, map1n[uint64](toUint8, 16)) + testUint64x8ConvertToUint8(t, archsimd.Uint64x8.TruncateToUint8, map1n[uint64](toUint8, 16)) + testUint32x4ConvertToUint16(t, archsimd.Uint32x4.TruncateToUint16, map1n[uint32](toUint16, 8)) + testUint32x8ConvertToUint16(t, archsimd.Uint32x8.TruncateToUint16, map1[uint32](toUint16)) + testUint32x16ConvertToUint16(t, archsimd.Uint32x16.TruncateToUint16, map1[uint32](toUint16)) + testUint64x2ConvertToUint16(t, archsimd.Uint64x2.TruncateToUint16, map1n[uint64](toUint16, 8)) + testUint64x4ConvertToUint16(t, archsimd.Uint64x4.TruncateToUint16, map1n[uint64](toUint16, 8)) + testUint64x8ConvertToUint16(t, archsimd.Uint64x8.TruncateToUint16, map1[uint64](toUint16)) + testUint64x2ConvertToUint32(t, archsimd.Uint64x2.TruncateToUint32, map1n[uint64](toUint32, 4)) + testUint64x4ConvertToUint32(t, archsimd.Uint64x4.TruncateToUint32, map1[uint64](toUint32)) + testUint64x8ConvertToUint32(t, archsimd.Uint64x8.TruncateToUint32, map1[uint64](toUint32)) + } } -func TestConvertsAVX512(t *testing.T) { - if !archsimd.X86.AVX512() { - t.Skip("Needs AVX512") +func TestSaturate(t *testing.T) { + if archsimd.X86.AVX512() { + testInt16x8ConvertToInt8(t, archsimd.Int16x8.SaturateToInt8, map1n[int16](satToInt8, 16)) + testInt16x16ConvertToInt8(t, archsimd.Int16x16.SaturateToInt8, map1[int16](satToInt8)) + testInt16x32ConvertToInt8(t, archsimd.Int16x32.SaturateToInt8, map1[int16](satToInt8)) + testInt32x4ConvertToInt8(t, archsimd.Int32x4.SaturateToInt8, map1n[int32](satToInt8, 16)) + testInt32x8ConvertToInt8(t, archsimd.Int32x8.SaturateToInt8, map1n[int32](satToInt8, 16)) + testInt32x16ConvertToInt8(t, archsimd.Int32x16.SaturateToInt8, map1[int32](satToInt8)) + testInt64x2ConvertToInt8(t, archsimd.Int64x2.SaturateToInt8, map1n[int64](satToInt8, 16)) + testInt64x4ConvertToInt8(t, archsimd.Int64x4.SaturateToInt8, map1n[int64](satToInt8, 16)) + testInt64x8ConvertToInt8(t, archsimd.Int64x8.SaturateToInt8, map1n[int64](satToInt8, 16)) + testInt32x4ConvertToInt16(t, archsimd.Int32x4.SaturateToInt16, map1n[int32](satToInt16, 8)) + testInt32x8ConvertToInt16(t, archsimd.Int32x8.SaturateToInt16, map1[int32](satToInt16)) + testInt32x16ConvertToInt16(t, archsimd.Int32x16.SaturateToInt16, map1[int32](satToInt16)) + testInt64x2ConvertToInt16(t, archsimd.Int64x2.SaturateToInt16, map1n[int64](satToInt16, 8)) + testInt64x4ConvertToInt16(t, archsimd.Int64x4.SaturateToInt16, map1n[int64](satToInt16, 8)) + testInt64x8ConvertToInt16(t, archsimd.Int64x8.SaturateToInt16, map1[int64](satToInt16)) + testInt64x2ConvertToInt32(t, archsimd.Int64x2.SaturateToInt32, map1n[int64](satToInt32, 4)) + testInt64x4ConvertToInt32(t, archsimd.Int64x4.SaturateToInt32, map1[int64](satToInt32)) + testInt64x8ConvertToInt32(t, archsimd.Int64x8.SaturateToInt32, map1[int64](satToInt32)) + + testUint16x8ConvertToUint8(t, archsimd.Uint16x8.SaturateToUint8, map1n[uint16](satToUint8, 16)) + testUint16x16ConvertToUint8(t, archsimd.Uint16x16.SaturateToUint8, map1[uint16](satToUint8)) + testUint16x32ConvertToUint8(t, archsimd.Uint16x32.SaturateToUint8, map1[uint16](satToUint8)) + testUint32x4ConvertToUint8(t, archsimd.Uint32x4.SaturateToUint8, map1n[uint32](satToUint8, 16)) + testUint32x8ConvertToUint8(t, archsimd.Uint32x8.SaturateToUint8, map1n[uint32](satToUint8, 16)) + testUint32x16ConvertToUint8(t, archsimd.Uint32x16.SaturateToUint8, map1[uint32](satToUint8)) + testUint64x2ConvertToUint8(t, archsimd.Uint64x2.SaturateToUint8, map1n[uint64](satToUint8, 16)) + testUint64x4ConvertToUint8(t, archsimd.Uint64x4.SaturateToUint8, map1n[uint64](satToUint8, 16)) + testUint64x8ConvertToUint8(t, archsimd.Uint64x8.SaturateToUint8, map1n[uint64](satToUint8, 16)) + testUint32x4ConvertToUint16(t, archsimd.Uint32x4.SaturateToUint16, map1n[uint32](satToUint16, 8)) + testUint32x8ConvertToUint16(t, archsimd.Uint32x8.SaturateToUint16, map1[uint32](satToUint16)) + testUint32x16ConvertToUint16(t, archsimd.Uint32x16.SaturateToUint16, map1[uint32](satToUint16)) + testUint64x2ConvertToUint16(t, archsimd.Uint64x2.SaturateToUint16, map1n[uint64](satToUint16, 8)) + testUint64x4ConvertToUint16(t, archsimd.Uint64x4.SaturateToUint16, map1n[uint64](satToUint16, 8)) + testUint64x8ConvertToUint16(t, archsimd.Uint64x8.SaturateToUint16, map1[uint64](satToUint16)) + testUint64x2ConvertToUint32(t, archsimd.Uint64x2.SaturateToUint32, map1n[uint64](satToUint32, 4)) + testUint64x4ConvertToUint32(t, archsimd.Uint64x4.SaturateToUint32, map1[uint64](satToUint32)) + testUint64x8ConvertToUint32(t, archsimd.Uint64x8.SaturateToUint32, map1[uint64](satToUint32)) } - testUint8x32ConvertToUint16(t, archsimd.Uint8x32.ExtendToUint16, map1[uint8](toUint16)) } diff --git a/src/simd/archsimd/maskmerge_gen_amd64.go b/src/simd/archsimd/maskmerge_gen_amd64.go index 5e9ea394b3..ad56521714 100644 --- a/src/simd/archsimd/maskmerge_gen_amd64.go +++ b/src/simd/archsimd/maskmerge_gen_amd64.go @@ -1,4 +1,4 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. //go:build goexperiment.simd @@ -286,7 +286,7 @@ func (x Int8x64) Masked(mask Mask8x64) Int8x64 { return im.And(x) } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Int8x64) Merge(y Int8x64, mask Mask8x64) Int8x64 { return y.blendMasked(x, mask) } @@ -297,7 +297,7 @@ func (x Int16x32) Masked(mask Mask16x32) Int16x32 { return im.And(x) } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Int16x32) Merge(y Int16x32, mask Mask16x32) Int16x32 { return y.blendMasked(x, mask) } @@ -308,7 +308,7 @@ func (x Int32x16) Masked(mask Mask32x16) Int32x16 { return im.And(x) } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Int32x16) Merge(y Int32x16, mask Mask32x16) Int32x16 { return y.blendMasked(x, mask) } @@ -319,7 +319,7 @@ func (x Int64x8) Masked(mask Mask64x8) Int64x8 { return im.And(x) } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Int64x8) Merge(y Int64x8, mask Mask64x8) Int64x8 { return y.blendMasked(x, mask) } @@ -330,7 +330,7 @@ func (x Uint8x64) Masked(mask Mask8x64) Uint8x64 { return x.AsInt8x64().And(im).AsUint8x64() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Uint8x64) Merge(y Uint8x64, mask Mask8x64) Uint8x64 { ix := x.AsInt8x64() iy := y.AsInt8x64() @@ -343,7 +343,7 @@ func (x Uint16x32) Masked(mask Mask16x32) Uint16x32 { return x.AsInt16x32().And(im).AsUint16x32() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Uint16x32) Merge(y Uint16x32, mask Mask16x32) Uint16x32 { ix := x.AsInt16x32() iy := y.AsInt16x32() @@ -356,7 +356,7 @@ func (x Uint32x16) Masked(mask Mask32x16) Uint32x16 { return x.AsInt32x16().And(im).AsUint32x16() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Uint32x16) Merge(y Uint32x16, mask Mask32x16) Uint32x16 { ix := x.AsInt32x16() iy := y.AsInt32x16() @@ -369,7 +369,7 @@ func (x Uint64x8) Masked(mask Mask64x8) Uint64x8 { return x.AsInt64x8().And(im).AsUint64x8() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Uint64x8) Merge(y Uint64x8, mask Mask64x8) Uint64x8 { ix := x.AsInt64x8() iy := y.AsInt64x8() @@ -382,7 +382,7 @@ func (x Float32x16) Masked(mask Mask32x16) Float32x16 { return x.AsInt32x16().And(im).AsFloat32x16() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Float32x16) Merge(y Float32x16, mask Mask32x16) Float32x16 { ix := x.AsInt32x16() iy := y.AsInt32x16() @@ -395,7 +395,7 @@ func (x Float64x8) Masked(mask Mask64x8) Float64x8 { return x.AsInt64x8().And(im).AsFloat64x8() } -// Merge returns x but with elements set to y where m is false. +// Merge returns x but with elements set to y where mask is false. func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 { ix := x.AsInt64x8() iy := y.AsInt64x8() diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index acd5719e6e..eba340c793 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -1,4 +1,4 @@ -// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. //go:build goexperiment.simd @@ -349,90 +349,101 @@ func (x Uint64x8) Add(y Uint64x8) Uint64x8 /* AddPairs */ // AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // // Asm: VHADDPS, CPU Feature: AVX func (x Float32x4) AddPairs(y Float32x4) Float32x4 // AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. -// -// Asm: VHADDPS, CPU Feature: AVX -func (x Float32x8) AddPairs(y Float32x8) Float32x8 - -// AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// For x = [x0, x1] and y = [y0, y1], the result is [x0+x1, y0+y1]. // // Asm: VHADDPD, CPU Feature: AVX func (x Float64x2) AddPairs(y Float64x2) Float64x2 // AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // -// Asm: VHADDPD, CPU Feature: AVX -func (x Float64x4) AddPairs(y Float64x4) Float64x4 +// Asm: VPHADDW, CPU Feature: AVX +func (x Int16x8) AddPairs(y Int16x8) Int16x8 // AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // -// Asm: VPHADDW, CPU Feature: AVX -func (x Int16x8) AddPairs(y Int16x8) Int16x8 +// Asm: VPHADDD, CPU Feature: AVX +func (x Int32x4) AddPairs(y Int32x4) Int32x4 // AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // -// Asm: VPHADDW, CPU Feature: AVX2 -func (x Int16x16) AddPairs(y Int16x16) Int16x16 +// Asm: VPHADDW, CPU Feature: AVX +func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8 // AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // // Asm: VPHADDD, CPU Feature: AVX -func (x Int32x4) AddPairs(y Int32x4) Int32x4 +func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4 -// AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +/* AddPairsGrouped */ + +// AddPairsGrouped horizontally adds adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // -// Asm: VPHADDD, CPU Feature: AVX2 -func (x Int32x8) AddPairs(y Int32x8) Int32x8 +// Asm: VHADDPS, CPU Feature: AVX +func (x Float32x8) AddPairsGrouped(y Float32x8) Float32x8 -// AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// AddPairsGrouped horizontally adds adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1] and y = [y0, y1], the result is [x0+x1, y0+y1]. // -// Asm: VPHADDW, CPU Feature: AVX -func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8 +// Asm: VHADDPD, CPU Feature: AVX +func (x Float64x4) AddPairsGrouped(y Float64x4) Float64x4 -// AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// AddPairsGrouped horizontally adds adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // // Asm: VPHADDW, CPU Feature: AVX2 -func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16 +func (x Int16x16) AddPairsGrouped(y Int16x16) Int16x16 -// AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// AddPairsGrouped horizontally adds adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // -// Asm: VPHADDD, CPU Feature: AVX -func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4 +// Asm: VPHADDD, CPU Feature: AVX2 +func (x Int32x8) AddPairsGrouped(y Int32x8) Int32x8 -// AddPairs horizontally adds adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// AddPairsGrouped horizontally adds adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. +// +// Asm: VPHADDW, CPU Feature: AVX2 +func (x Uint16x16) AddPairsGrouped(y Uint16x16) Uint16x16 + +// AddPairsGrouped horizontally adds adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // // Asm: VPHADDD, CPU Feature: AVX2 -func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8 +func (x Uint32x8) AddPairsGrouped(y Uint32x8) Uint32x8 /* AddPairsSaturated */ // AddPairsSaturated horizontally adds adjacent pairs of elements with saturation. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // // Asm: VPHADDSW, CPU Feature: AVX func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8 -// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +/* AddPairsSaturatedGrouped */ + +// AddPairsSaturatedGrouped horizontally adds adjacent pairs of elements with saturation. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0+x1, x2+x3, ..., y0+y1, y2+y3, ...]. // // Asm: VPHADDSW, CPU Feature: AVX2 -func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16 +func (x Int16x16) AddPairsSaturatedGrouped(y Int16x16) Int16x16 /* AddSaturated */ @@ -1275,7 +1286,9 @@ func (x Uint64x8) Compress(mask Mask64x8) Uint64x8 /* ConcatPermute */ // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1283,7 +1296,9 @@ func (x Uint64x8) Compress(mask Mask64x8) Uint64x8 func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1291,7 +1306,9 @@ func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16 func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1299,7 +1316,9 @@ func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16 func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1307,7 +1326,9 @@ func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32 func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1315,7 +1336,9 @@ func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32 func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1323,7 +1346,9 @@ func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64 func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1331,7 +1356,9 @@ func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64 func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1339,7 +1366,9 @@ func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8 func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1347,7 +1376,9 @@ func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8 func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1355,7 +1386,9 @@ func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16 func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1363,7 +1396,9 @@ func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16 func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1371,7 +1406,9 @@ func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32 func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1379,7 +1416,9 @@ func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32 func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1387,7 +1426,9 @@ func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4 func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1395,7 +1436,9 @@ func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4 func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1403,7 +1446,9 @@ func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4 func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1411,7 +1456,9 @@ func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8 func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1419,7 +1466,9 @@ func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8 func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1427,7 +1476,9 @@ func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8 func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1435,7 +1486,9 @@ func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16 func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1443,7 +1496,9 @@ func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16 func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1451,7 +1506,9 @@ func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16 func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1459,7 +1516,9 @@ func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2 func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1467,7 +1526,9 @@ func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2 func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1475,7 +1536,9 @@ func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2 func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1483,7 +1546,9 @@ func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4 func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1491,7 +1556,9 @@ func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4 func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1499,7 +1566,9 @@ func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4 func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1507,7 +1576,9 @@ func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8 func (x Int64x8) ConcatPermute(y Int64x8, indices Uint64x8) Int64x8 // ConcatPermute performs a full permutation of vector x, y using indices: -// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// +// result = {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// // where xy is the concatenation of x (lower half) and y (upper half). // Only the needed bits to represent xy's index are used in indices' elements. // @@ -1516,33 +1587,33 @@ func (x Uint64x8) ConcatPermute(y Uint64x8, indices Uint64x8) Uint64x8 /* ConcatShiftBytesRight */ -// ConcatShiftBytesRight concatenates x and y and shift it right by constant bytes. +// ConcatShiftBytesRight concatenates x and y and shift it right by shift bytes. // The result vector will be the lower half of the concatenated vector. // -// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPALIGNR, CPU Feature: AVX -func (x Uint8x16) ConcatShiftBytesRight(constant uint8, y Uint8x16) Uint8x16 +func (x Uint8x16) ConcatShiftBytesRight(shift uint8, y Uint8x16) Uint8x16 /* ConcatShiftBytesRightGrouped */ -// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes. +// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by shift bytes. // The result vector will be the lower half of the concatenated vector. // This operation is performed grouped by each 16 byte. // -// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPALIGNR, CPU Feature: AVX2 -func (x Uint8x32) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x32) Uint8x32 +func (x Uint8x32) ConcatShiftBytesRightGrouped(shift uint8, y Uint8x32) Uint8x32 -// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes. +// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by shift bytes. // The result vector will be the lower half of the concatenated vector. // This operation is performed grouped by each 16 byte. // -// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPALIGNR, CPU Feature: AVX512 -func (x Uint8x64) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x64) Uint8x64 +func (x Uint8x64) ConcatShiftBytesRightGrouped(shift uint8, y Uint8x64) Uint8x64 /* ConvertToFloat32 */ @@ -1872,38 +1943,38 @@ func (x Float64x8) ConvertToUint64() Uint64x8 /* CopySign */ -// CopySign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. +// CopySign returns the product of x with -1, 0, or 1, +// whichever constant is nearest to the value of y. // // Asm: VPSIGNB, CPU Feature: AVX func (x Int8x16) CopySign(y Int8x16) Int8x16 -// CopySign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. +// CopySign returns the product of x with -1, 0, or 1, +// whichever constant is nearest to the value of y. // // Asm: VPSIGNB, CPU Feature: AVX2 func (x Int8x32) CopySign(y Int8x32) Int8x32 -// CopySign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. +// CopySign returns the product of x with -1, 0, or 1, +// whichever constant is nearest to the value of y. // // Asm: VPSIGNW, CPU Feature: AVX func (x Int16x8) CopySign(y Int16x8) Int16x8 -// CopySign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. +// CopySign returns the product of x with -1, 0, or 1, +// whichever constant is nearest to the value of y. // // Asm: VPSIGNW, CPU Feature: AVX2 func (x Int16x16) CopySign(y Int16x16) Int16x16 -// CopySign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. +// CopySign returns the product of x with -1, 0, or 1, +// whichever constant is nearest to the value of y. // // Asm: VPSIGND, CPU Feature: AVX func (x Int32x4) CopySign(y Int32x4) Int32x4 -// CopySign returns the product of the first operand with -1, 0, or 1, -// whichever constant is nearest to the value of the second operand. +// CopySign returns the product of x with -1, 0, or 1, +// whichever constant is nearest to the value of y. // // Asm: VPSIGND, CPU Feature: AVX2 func (x Int32x8) CopySign(y Int32x8) Int32x8 @@ -1980,194 +2051,154 @@ func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16 // Asm: VPMADDUBSW, CPU Feature: AVX512 func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32 -/* DotProductQuadruple */ - -// DotProductQuadruple performs dot products on groups of 4 elements of x and y. -// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSD, CPU Feature: AVXVNNI -func (x Int8x16) DotProductQuadruple(y Uint8x16) Int32x4 - -// DotProductQuadruple performs dot products on groups of 4 elements of x and y. -// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSD, CPU Feature: AVXVNNI -func (x Int8x32) DotProductQuadruple(y Uint8x32) Int32x8 - -// DotProductQuadruple performs dot products on groups of 4 elements of x and y. -// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSD, CPU Feature: AVX512VNNI -func (x Int8x64) DotProductQuadruple(y Uint8x64) Int32x16 - -/* DotProductQuadrupleSaturated */ - -// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y. -// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSDS, CPU Feature: AVXVNNI -func (x Int8x16) DotProductQuadrupleSaturated(y Uint8x16) Int32x4 - -// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y. -// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSDS, CPU Feature: AVXVNNI -func (x Int8x32) DotProductQuadrupleSaturated(y Uint8x32) Int32x8 - -// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y. -// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI -func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16 - /* Equal */ -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQB, CPU Feature: AVX func (x Int8x16) Equal(y Int8x16) Mask8x16 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQB, CPU Feature: AVX2 func (x Int8x32) Equal(y Int8x32) Mask8x32 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQB, CPU Feature: AVX512 func (x Int8x64) Equal(y Int8x64) Mask8x64 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQW, CPU Feature: AVX func (x Int16x8) Equal(y Int16x8) Mask16x8 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQW, CPU Feature: AVX2 func (x Int16x16) Equal(y Int16x16) Mask16x16 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQW, CPU Feature: AVX512 func (x Int16x32) Equal(y Int16x32) Mask16x32 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQD, CPU Feature: AVX func (x Int32x4) Equal(y Int32x4) Mask32x4 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQD, CPU Feature: AVX2 func (x Int32x8) Equal(y Int32x8) Mask32x8 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQD, CPU Feature: AVX512 func (x Int32x16) Equal(y Int32x16) Mask32x16 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQQ, CPU Feature: AVX func (x Int64x2) Equal(y Int64x2) Mask64x2 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQQ, CPU Feature: AVX2 func (x Int64x4) Equal(y Int64x4) Mask64x4 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQQ, CPU Feature: AVX512 func (x Int64x8) Equal(y Int64x8) Mask64x8 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQB, CPU Feature: AVX func (x Uint8x16) Equal(y Uint8x16) Mask8x16 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQB, CPU Feature: AVX2 func (x Uint8x32) Equal(y Uint8x32) Mask8x32 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQB, CPU Feature: AVX512 func (x Uint8x64) Equal(y Uint8x64) Mask8x64 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQW, CPU Feature: AVX func (x Uint16x8) Equal(y Uint16x8) Mask16x8 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQW, CPU Feature: AVX2 func (x Uint16x16) Equal(y Uint16x16) Mask16x16 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQW, CPU Feature: AVX512 func (x Uint16x32) Equal(y Uint16x32) Mask16x32 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQD, CPU Feature: AVX func (x Uint32x4) Equal(y Uint32x4) Mask32x4 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQD, CPU Feature: AVX2 func (x Uint32x8) Equal(y Uint32x8) Mask32x8 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQD, CPU Feature: AVX512 func (x Uint32x16) Equal(y Uint32x16) Mask32x16 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQQ, CPU Feature: AVX func (x Uint64x2) Equal(y Uint64x2) Mask64x2 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQQ, CPU Feature: AVX2 func (x Uint64x4) Equal(y Uint64x4) Mask64x4 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VPCMPEQQ, CPU Feature: AVX512 func (x Uint64x8) Equal(y Uint64x8) Mask64x8 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x4) Equal(y Float32x4) Mask32x4 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x8) Equal(y Float32x8) Mask32x8 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VCMPPS, CPU Feature: AVX512 func (x Float32x16) Equal(y Float32x16) Mask32x16 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x2) Equal(y Float64x2) Mask64x2 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x4) Equal(y Float64x4) Mask64x4 -// Equal returns x equals y, elementwise. +// Equal returns a mask whose elements indicate whether x == y. // // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) Equal(y Float64x8) Mask64x8 @@ -2354,254 +2385,218 @@ func (x Uint64x4) Expand(mask Mask64x4) Uint64x4 // Asm: VPEXPANDQ, CPU Feature: AVX512 func (x Uint64x8) Expand(mask Mask64x8) Uint64x8 -/* ExtendLo2ToInt64x2 */ +/* ExtendLo2ToInt64 */ -// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64. -// The result vector's elements are sign-extended. +// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64. // // Asm: VPMOVSXBQ, CPU Feature: AVX -func (x Int8x16) ExtendLo2ToInt64x2() Int64x2 +func (x Int8x16) ExtendLo2ToInt64() Int64x2 -// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64. -// The result vector's elements are sign-extended. +// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64. // // Asm: VPMOVSXWQ, CPU Feature: AVX -func (x Int16x8) ExtendLo2ToInt64x2() Int64x2 +func (x Int16x8) ExtendLo2ToInt64() Int64x2 -// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64. -// The result vector's elements are sign-extended. +// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64. // // Asm: VPMOVSXDQ, CPU Feature: AVX -func (x Int32x4) ExtendLo2ToInt64x2() Int64x2 +func (x Int32x4) ExtendLo2ToInt64() Int64x2 -/* ExtendLo2ToUint64x2 */ +/* ExtendLo2ToUint64 */ -// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64. -// The result vector's elements are zero-extended. +// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64. // // Asm: VPMOVZXBQ, CPU Feature: AVX -func (x Uint8x16) ExtendLo2ToUint64x2() Uint64x2 +func (x Uint8x16) ExtendLo2ToUint64() Uint64x2 -// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64. -// The result vector's elements are zero-extended. +// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64. // // Asm: VPMOVZXWQ, CPU Feature: AVX -func (x Uint16x8) ExtendLo2ToUint64x2() Uint64x2 +func (x Uint16x8) ExtendLo2ToUint64() Uint64x2 -// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64. -// The result vector's elements are zero-extended. +// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64. // // Asm: VPMOVZXDQ, CPU Feature: AVX -func (x Uint32x4) ExtendLo2ToUint64x2() Uint64x2 +func (x Uint32x4) ExtendLo2ToUint64() Uint64x2 -/* ExtendLo4ToInt32x4 */ +/* ExtendLo4ToInt32 */ -// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32. -// The result vector's elements are sign-extended. +// ExtendLo4ToInt32 sign-extends 4 lowest vector element values to int32. // // Asm: VPMOVSXBD, CPU Feature: AVX -func (x Int8x16) ExtendLo4ToInt32x4() Int32x4 +func (x Int8x16) ExtendLo4ToInt32() Int32x4 -// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32. -// The result vector's elements are sign-extended. +// ExtendLo4ToInt32 sign-extends 4 lowest vector element values to int32. // // Asm: VPMOVSXWD, CPU Feature: AVX -func (x Int16x8) ExtendLo4ToInt32x4() Int32x4 +func (x Int16x8) ExtendLo4ToInt32() Int32x4 -/* ExtendLo4ToInt64x4 */ +/* ExtendLo4ToInt64 */ -// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64. -// The result vector's elements are sign-extended. +// ExtendLo4ToInt64 sign-extends 4 lowest vector element values to int64. // // Asm: VPMOVSXBQ, CPU Feature: AVX2 -func (x Int8x16) ExtendLo4ToInt64x4() Int64x4 +func (x Int8x16) ExtendLo4ToInt64() Int64x4 -// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64. -// The result vector's elements are sign-extended. +// ExtendLo4ToInt64 sign-extends 4 lowest vector element values to int64. // // Asm: VPMOVSXWQ, CPU Feature: AVX2 -func (x Int16x8) ExtendLo4ToInt64x4() Int64x4 +func (x Int16x8) ExtendLo4ToInt64() Int64x4 -/* ExtendLo4ToUint32x4 */ +/* ExtendLo4ToUint32 */ -// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32. -// The result vector's elements are zero-extended. +// ExtendLo4ToUint32 zero-extends 4 lowest vector element values to uint32. // // Asm: VPMOVZXBD, CPU Feature: AVX -func (x Uint8x16) ExtendLo4ToUint32x4() Uint32x4 +func (x Uint8x16) ExtendLo4ToUint32() Uint32x4 -// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32. -// The result vector's elements are zero-extended. +// ExtendLo4ToUint32 zero-extends 4 lowest vector element values to uint32. // // Asm: VPMOVZXWD, CPU Feature: AVX -func (x Uint16x8) ExtendLo4ToUint32x4() Uint32x4 +func (x Uint16x8) ExtendLo4ToUint32() Uint32x4 -/* ExtendLo4ToUint64x4 */ +/* ExtendLo4ToUint64 */ -// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64. -// The result vector's elements are zero-extended. +// ExtendLo4ToUint64 zero-extends 4 lowest vector element values to uint64. // // Asm: VPMOVZXBQ, CPU Feature: AVX2 -func (x Uint8x16) ExtendLo4ToUint64x4() Uint64x4 +func (x Uint8x16) ExtendLo4ToUint64() Uint64x4 -// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64. -// The result vector's elements are zero-extended. +// ExtendLo4ToUint64 zero-extends 4 lowest vector element values to uint64. // // Asm: VPMOVZXWQ, CPU Feature: AVX2 -func (x Uint16x8) ExtendLo4ToUint64x4() Uint64x4 +func (x Uint16x8) ExtendLo4ToUint64() Uint64x4 -/* ExtendLo8ToInt16x8 */ +/* ExtendLo8ToInt16 */ -// ExtendLo8ToInt16x8 converts 8 lowest vector element values to int16. -// The result vector's elements are sign-extended. +// ExtendLo8ToInt16 sign-extends 8 lowest vector element values to int16. // // Asm: VPMOVSXBW, CPU Feature: AVX -func (x Int8x16) ExtendLo8ToInt16x8() Int16x8 +func (x Int8x16) ExtendLo8ToInt16() Int16x8 -/* ExtendLo8ToInt32x8 */ +/* ExtendLo8ToInt32 */ -// ExtendLo8ToInt32x8 converts 8 lowest vector element values to int32. -// The result vector's elements are sign-extended. +// ExtendLo8ToInt32 sign-extends 8 lowest vector element values to int32. // // Asm: VPMOVSXBD, CPU Feature: AVX2 -func (x Int8x16) ExtendLo8ToInt32x8() Int32x8 +func (x Int8x16) ExtendLo8ToInt32() Int32x8 -/* ExtendLo8ToInt64x8 */ +/* ExtendLo8ToInt64 */ -// ExtendLo8ToInt64x8 converts 8 lowest vector element values to int64. -// The result vector's elements are sign-extended. +// ExtendLo8ToInt64 sign-extends 8 lowest vector element values to int64. // // Asm: VPMOVSXBQ, CPU Feature: AVX512 -func (x Int8x16) ExtendLo8ToInt64x8() Int64x8 +func (x Int8x16) ExtendLo8ToInt64() Int64x8 -/* ExtendLo8ToUint16x8 */ +/* ExtendLo8ToUint16 */ -// ExtendLo8ToUint16x8 converts 8 lowest vector element values to uint16. -// The result vector's elements are zero-extended. +// ExtendLo8ToUint16 zero-extends 8 lowest vector element values to uint16. // // Asm: VPMOVZXBW, CPU Feature: AVX -func (x Uint8x16) ExtendLo8ToUint16x8() Uint16x8 +func (x Uint8x16) ExtendLo8ToUint16() Uint16x8 -/* ExtendLo8ToUint32x8 */ +/* ExtendLo8ToUint32 */ -// ExtendLo8ToUint32x8 converts 8 lowest vector element values to uint32. -// The result vector's elements are zero-extended. +// ExtendLo8ToUint32 zero-extends 8 lowest vector element values to uint32. // // Asm: VPMOVZXBD, CPU Feature: AVX2 -func (x Uint8x16) ExtendLo8ToUint32x8() Uint32x8 +func (x Uint8x16) ExtendLo8ToUint32() Uint32x8 -/* ExtendLo8ToUint64x8 */ +/* ExtendLo8ToUint64 */ -// ExtendLo8ToUint64x8 converts 8 lowest vector element values to uint64. -// The result vector's elements are zero-extended. +// ExtendLo8ToUint64 zero-extends 8 lowest vector element values to uint64. // // Asm: VPMOVZXBQ, CPU Feature: AVX512 -func (x Uint8x16) ExtendLo8ToUint64x8() Uint64x8 +func (x Uint8x16) ExtendLo8ToUint64() Uint64x8 /* ExtendToInt16 */ -// ExtendToInt16 converts element values to int16. -// The result vector's elements are sign-extended. +// ExtendToInt16 sign-extends element values to int16. // // Asm: VPMOVSXBW, CPU Feature: AVX2 func (x Int8x16) ExtendToInt16() Int16x16 -// ExtendToInt16 converts element values to int16. -// The result vector's elements are sign-extended. +// ExtendToInt16 sign-extends element values to int16. // // Asm: VPMOVSXBW, CPU Feature: AVX512 func (x Int8x32) ExtendToInt16() Int16x32 /* ExtendToInt32 */ -// ExtendToInt32 converts element values to int32. -// The result vector's elements are sign-extended. +// ExtendToInt32 sign-extends element values to int32. // // Asm: VPMOVSXBD, CPU Feature: AVX512 func (x Int8x16) ExtendToInt32() Int32x16 -// ExtendToInt32 converts element values to int32. -// The result vector's elements are sign-extended. +// ExtendToInt32 sign-extends element values to int32. // // Asm: VPMOVSXWD, CPU Feature: AVX2 func (x Int16x8) ExtendToInt32() Int32x8 -// ExtendToInt32 converts element values to int32. -// The result vector's elements are sign-extended. +// ExtendToInt32 sign-extends element values to int32. // // Asm: VPMOVSXWD, CPU Feature: AVX512 func (x Int16x16) ExtendToInt32() Int32x16 /* ExtendToInt64 */ -// ExtendToInt64 converts element values to int64. -// The result vector's elements are sign-extended. +// ExtendToInt64 sign-extends element values to int64. // // Asm: VPMOVSXWQ, CPU Feature: AVX512 func (x Int16x8) ExtendToInt64() Int64x8 -// ExtendToInt64 converts element values to int64. -// The result vector's elements are sign-extended. +// ExtendToInt64 sign-extends element values to int64. // // Asm: VPMOVSXDQ, CPU Feature: AVX2 func (x Int32x4) ExtendToInt64() Int64x4 -// ExtendToInt64 converts element values to int64. -// The result vector's elements are sign-extended. +// ExtendToInt64 sign-extends element values to int64. // // Asm: VPMOVSXDQ, CPU Feature: AVX512 func (x Int32x8) ExtendToInt64() Int64x8 /* ExtendToUint16 */ -// ExtendToUint16 converts element values to uint16. -// The result vector's elements are zero-extended. +// ExtendToUint16 zero-extends element values to uint16. // // Asm: VPMOVZXBW, CPU Feature: AVX2 func (x Uint8x16) ExtendToUint16() Uint16x16 -// ExtendToUint16 converts element values to uint16. -// The result vector's elements are zero-extended. +// ExtendToUint16 zero-extends element values to uint16. // // Asm: VPMOVZXBW, CPU Feature: AVX512 func (x Uint8x32) ExtendToUint16() Uint16x32 /* ExtendToUint32 */ -// ExtendToUint32 converts element values to uint32. -// The result vector's elements are zero-extended. +// ExtendToUint32 zero-extends element values to uint32. // // Asm: VPMOVZXBD, CPU Feature: AVX512 func (x Uint8x16) ExtendToUint32() Uint32x16 -// ExtendToUint32 converts element values to uint32. -// The result vector's elements are zero-extended. +// ExtendToUint32 zero-extends element values to uint32. // // Asm: VPMOVZXWD, CPU Feature: AVX2 func (x Uint16x8) ExtendToUint32() Uint32x8 -// ExtendToUint32 converts element values to uint32. -// The result vector's elements are zero-extended. +// ExtendToUint32 zero-extends element values to uint32. // // Asm: VPMOVZXWD, CPU Feature: AVX512 func (x Uint16x16) ExtendToUint32() Uint32x16 /* ExtendToUint64 */ -// ExtendToUint64 converts element values to uint64. -// The result vector's elements are zero-extended. +// ExtendToUint64 zero-extends element values to uint64. // // Asm: VPMOVZXWQ, CPU Feature: AVX512 func (x Uint16x8) ExtendToUint64() Uint64x8 -// ExtendToUint64 converts element values to uint64. -// The result vector's elements are zero-extended. +// ExtendToUint64 zero-extends element values to uint64. // // Asm: VPMOVZXDQ, CPU Feature: AVX2 func (x Uint32x4) ExtendToUint64() Uint64x4 -// ExtendToUint64 converts element values to uint64. -// The result vector's elements are zero-extended. +// ExtendToUint64 zero-extends element values to uint64. // // Asm: VPMOVZXDQ, CPU Feature: AVX512 func (x Uint32x8) ExtendToUint64() Uint64x8 @@ -3081,184 +3076,184 @@ func (x Uint64x8) GetLo() Uint64x4 /* Greater */ -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTB, CPU Feature: AVX func (x Int8x16) Greater(y Int8x16) Mask8x16 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTB, CPU Feature: AVX2 func (x Int8x32) Greater(y Int8x32) Mask8x32 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTB, CPU Feature: AVX512 func (x Int8x64) Greater(y Int8x64) Mask8x64 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTW, CPU Feature: AVX func (x Int16x8) Greater(y Int16x8) Mask16x8 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTW, CPU Feature: AVX2 func (x Int16x16) Greater(y Int16x16) Mask16x16 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTW, CPU Feature: AVX512 func (x Int16x32) Greater(y Int16x32) Mask16x32 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTD, CPU Feature: AVX func (x Int32x4) Greater(y Int32x4) Mask32x4 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTD, CPU Feature: AVX2 func (x Int32x8) Greater(y Int32x8) Mask32x8 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTD, CPU Feature: AVX512 func (x Int32x16) Greater(y Int32x16) Mask32x16 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTQ, CPU Feature: AVX func (x Int64x2) Greater(y Int64x2) Mask64x2 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTQ, CPU Feature: AVX2 func (x Int64x4) Greater(y Int64x4) Mask64x4 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPGTQ, CPU Feature: AVX512 func (x Int64x8) Greater(y Int64x8) Mask64x8 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x4) Greater(y Float32x4) Mask32x4 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x8) Greater(y Float32x8) Mask32x8 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VCMPPS, CPU Feature: AVX512 func (x Float32x16) Greater(y Float32x16) Mask32x16 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x2) Greater(y Float64x2) Mask64x2 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x4) Greater(y Float64x4) Mask64x4 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) Greater(y Float64x8) Mask64x8 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) Greater(y Uint8x64) Mask8x64 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) Greater(y Uint16x32) Mask16x32 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) Greater(y Uint32x16) Mask32x16 -// Greater returns x greater-than y, elementwise. +// Greater returns a mask whose elements indicate whether x > y. // // Asm: VPCMPUQ, CPU Feature: AVX512 func (x Uint64x8) Greater(y Uint64x8) Mask64x8 /* GreaterEqual */ -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VCMPPS, CPU Feature: AVX512 func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VPCMPB, CPU Feature: AVX512 func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VPCMPW, CPU Feature: AVX512 func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VPCMPD, CPU Feature: AVX512 func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VPCMPQ, CPU Feature: AVX512 func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16 -// GreaterEqual returns x greater-than-or-equals y, elementwise. +// GreaterEqual returns a mask whose elements indicate whether x >= y. // // Asm: VPCMPUQ, CPU Feature: AVX512 func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8 @@ -3451,38 +3446,6 @@ func (x Uint64x4) InterleaveLoGrouped(y Uint64x4) Uint64x4 // Asm: VPUNPCKLQDQ, CPU Feature: AVX512 func (x Uint64x8) InterleaveLoGrouped(y Uint64x8) Uint64x8 -/* IsNan */ - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) IsNan(y Float32x4) Mask32x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) IsNan(y Float32x8) Mask32x8 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX512 -func (x Float32x16) IsNan(y Float32x16) Mask32x16 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) IsNan(y Float64x2) Mask64x2 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) IsNan(y Float64x4) Mask64x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX512 -func (x Float64x8) IsNan(y Float64x8) Mask64x8 - /* LeadingZeros */ // LeadingZeros counts the leading zeros of each element in x. @@ -3547,448 +3510,448 @@ func (x Uint64x8) LeadingZeros() Uint64x8 /* Less */ -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x4) Less(y Float32x4) Mask32x4 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x8) Less(y Float32x8) Mask32x8 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VCMPPS, CPU Feature: AVX512 func (x Float32x16) Less(y Float32x16) Mask32x16 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x2) Less(y Float64x2) Mask64x2 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x4) Less(y Float64x4) Mask64x4 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) Less(y Float64x8) Mask64x8 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VPCMPB, CPU Feature: AVX512 func (x Int8x64) Less(y Int8x64) Mask8x64 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VPCMPW, CPU Feature: AVX512 func (x Int16x32) Less(y Int16x32) Mask16x32 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VPCMPD, CPU Feature: AVX512 func (x Int32x16) Less(y Int32x16) Mask32x16 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VPCMPQ, CPU Feature: AVX512 func (x Int64x8) Less(y Int64x8) Mask64x8 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) Less(y Uint8x64) Mask8x64 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) Less(y Uint16x32) Mask16x32 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) Less(y Uint32x16) Mask32x16 -// Less returns x less-than y, elementwise. +// Less returns a mask whose elements indicate whether x < y. // // Asm: VPCMPUQ, CPU Feature: AVX512 func (x Uint64x8) Less(y Uint64x8) Mask64x8 /* LessEqual */ -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x4) LessEqual(y Float32x4) Mask32x4 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x8) LessEqual(y Float32x8) Mask32x8 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VCMPPS, CPU Feature: AVX512 func (x Float32x16) LessEqual(y Float32x16) Mask32x16 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x2) LessEqual(y Float64x2) Mask64x2 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x4) LessEqual(y Float64x4) Mask64x4 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) LessEqual(y Float64x8) Mask64x8 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VPCMPB, CPU Feature: AVX512 func (x Int8x64) LessEqual(y Int8x64) Mask8x64 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VPCMPW, CPU Feature: AVX512 func (x Int16x32) LessEqual(y Int16x32) Mask16x32 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VPCMPD, CPU Feature: AVX512 func (x Int32x16) LessEqual(y Int32x16) Mask32x16 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VPCMPQ, CPU Feature: AVX512 func (x Int64x8) LessEqual(y Int64x8) Mask64x8 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16 -// LessEqual returns x less-than-or-equals y, elementwise. +// LessEqual returns a mask whose elements indicate whether x <= y. // // Asm: VPCMPUQ, CPU Feature: AVX512 func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8 /* Max */ -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VMAXPS, CPU Feature: AVX func (x Float32x4) Max(y Float32x4) Float32x4 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VMAXPS, CPU Feature: AVX func (x Float32x8) Max(y Float32x8) Float32x8 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VMAXPS, CPU Feature: AVX512 func (x Float32x16) Max(y Float32x16) Float32x16 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VMAXPD, CPU Feature: AVX func (x Float64x2) Max(y Float64x2) Float64x2 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VMAXPD, CPU Feature: AVX func (x Float64x4) Max(y Float64x4) Float64x4 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VMAXPD, CPU Feature: AVX512 func (x Float64x8) Max(y Float64x8) Float64x8 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSB, CPU Feature: AVX func (x Int8x16) Max(y Int8x16) Int8x16 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSB, CPU Feature: AVX2 func (x Int8x32) Max(y Int8x32) Int8x32 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSB, CPU Feature: AVX512 func (x Int8x64) Max(y Int8x64) Int8x64 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSW, CPU Feature: AVX func (x Int16x8) Max(y Int16x8) Int16x8 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSW, CPU Feature: AVX2 func (x Int16x16) Max(y Int16x16) Int16x16 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSW, CPU Feature: AVX512 func (x Int16x32) Max(y Int16x32) Int16x32 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSD, CPU Feature: AVX func (x Int32x4) Max(y Int32x4) Int32x4 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSD, CPU Feature: AVX2 func (x Int32x8) Max(y Int32x8) Int32x8 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSD, CPU Feature: AVX512 func (x Int32x16) Max(y Int32x16) Int32x16 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSQ, CPU Feature: AVX512 func (x Int64x2) Max(y Int64x2) Int64x2 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSQ, CPU Feature: AVX512 func (x Int64x4) Max(y Int64x4) Int64x4 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXSQ, CPU Feature: AVX512 func (x Int64x8) Max(y Int64x8) Int64x8 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUB, CPU Feature: AVX func (x Uint8x16) Max(y Uint8x16) Uint8x16 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUB, CPU Feature: AVX2 func (x Uint8x32) Max(y Uint8x32) Uint8x32 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUB, CPU Feature: AVX512 func (x Uint8x64) Max(y Uint8x64) Uint8x64 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUW, CPU Feature: AVX func (x Uint16x8) Max(y Uint16x8) Uint16x8 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUW, CPU Feature: AVX2 func (x Uint16x16) Max(y Uint16x16) Uint16x16 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUW, CPU Feature: AVX512 func (x Uint16x32) Max(y Uint16x32) Uint16x32 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUD, CPU Feature: AVX func (x Uint32x4) Max(y Uint32x4) Uint32x4 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUD, CPU Feature: AVX2 func (x Uint32x8) Max(y Uint32x8) Uint32x8 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUD, CPU Feature: AVX512 func (x Uint32x16) Max(y Uint32x16) Uint32x16 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUQ, CPU Feature: AVX512 func (x Uint64x2) Max(y Uint64x2) Uint64x2 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUQ, CPU Feature: AVX512 func (x Uint64x4) Max(y Uint64x4) Uint64x4 -// Max computes the maximum of corresponding elements. +// Max computes the maximum of each pair of corresponding elements in x and y. // // Asm: VPMAXUQ, CPU Feature: AVX512 func (x Uint64x8) Max(y Uint64x8) Uint64x8 /* Min */ -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VMINPS, CPU Feature: AVX func (x Float32x4) Min(y Float32x4) Float32x4 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VMINPS, CPU Feature: AVX func (x Float32x8) Min(y Float32x8) Float32x8 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VMINPS, CPU Feature: AVX512 func (x Float32x16) Min(y Float32x16) Float32x16 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VMINPD, CPU Feature: AVX func (x Float64x2) Min(y Float64x2) Float64x2 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VMINPD, CPU Feature: AVX func (x Float64x4) Min(y Float64x4) Float64x4 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VMINPD, CPU Feature: AVX512 func (x Float64x8) Min(y Float64x8) Float64x8 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSB, CPU Feature: AVX func (x Int8x16) Min(y Int8x16) Int8x16 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSB, CPU Feature: AVX2 func (x Int8x32) Min(y Int8x32) Int8x32 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSB, CPU Feature: AVX512 func (x Int8x64) Min(y Int8x64) Int8x64 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSW, CPU Feature: AVX func (x Int16x8) Min(y Int16x8) Int16x8 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSW, CPU Feature: AVX2 func (x Int16x16) Min(y Int16x16) Int16x16 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSW, CPU Feature: AVX512 func (x Int16x32) Min(y Int16x32) Int16x32 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSD, CPU Feature: AVX func (x Int32x4) Min(y Int32x4) Int32x4 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSD, CPU Feature: AVX2 func (x Int32x8) Min(y Int32x8) Int32x8 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSD, CPU Feature: AVX512 func (x Int32x16) Min(y Int32x16) Int32x16 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSQ, CPU Feature: AVX512 func (x Int64x2) Min(y Int64x2) Int64x2 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSQ, CPU Feature: AVX512 func (x Int64x4) Min(y Int64x4) Int64x4 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINSQ, CPU Feature: AVX512 func (x Int64x8) Min(y Int64x8) Int64x8 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUB, CPU Feature: AVX func (x Uint8x16) Min(y Uint8x16) Uint8x16 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUB, CPU Feature: AVX2 func (x Uint8x32) Min(y Uint8x32) Uint8x32 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUB, CPU Feature: AVX512 func (x Uint8x64) Min(y Uint8x64) Uint8x64 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUW, CPU Feature: AVX func (x Uint16x8) Min(y Uint16x8) Uint16x8 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUW, CPU Feature: AVX2 func (x Uint16x16) Min(y Uint16x16) Uint16x16 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUW, CPU Feature: AVX512 func (x Uint16x32) Min(y Uint16x32) Uint16x32 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUD, CPU Feature: AVX func (x Uint32x4) Min(y Uint32x4) Uint32x4 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUD, CPU Feature: AVX2 func (x Uint32x8) Min(y Uint32x8) Uint32x8 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUD, CPU Feature: AVX512 func (x Uint32x16) Min(y Uint32x16) Uint32x16 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUQ, CPU Feature: AVX512 func (x Uint64x2) Min(y Uint64x2) Uint64x2 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUQ, CPU Feature: AVX512 func (x Uint64x4) Min(y Uint64x4) Uint64x4 -// Min computes the minimum of corresponding elements. +// Min computes the minimum of each pair of corresponding elements in x and y. // // Asm: VPMINUQ, CPU Feature: AVX512 func (x Uint64x8) Min(y Uint64x8) Uint64x8 @@ -4182,25 +4145,25 @@ func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8 /* MulEvenWiden */ // MulEvenWiden multiplies even-indexed elements, widening the result. -// Result[i] = v1.Even[i] * v2.Even[i]. +// Result[i] = v1[2*i] * v2[2*i]. // // Asm: VPMULDQ, CPU Feature: AVX func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2 // MulEvenWiden multiplies even-indexed elements, widening the result. -// Result[i] = v1.Even[i] * v2.Even[i]. +// Result[i] = v1[2*i] * v2[2*i]. // // Asm: VPMULDQ, CPU Feature: AVX2 func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4 // MulEvenWiden multiplies even-indexed elements, widening the result. -// Result[i] = v1.Even[i] * v2.Even[i]. +// Result[i] = v1[2*i] * v2[2*i]. // // Asm: VPMULUDQ, CPU Feature: AVX func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2 // MulEvenWiden multiplies even-indexed elements, widening the result. -// Result[i] = v1.Even[i] * v2.Even[i]. +// Result[i] = v1[2*i] * v2[2*i]. // // Asm: VPMULUDQ, CPU Feature: AVX2 func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4 @@ -4271,72 +4234,72 @@ func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8 /* NotEqual */ -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x4) NotEqual(y Float32x4) Mask32x4 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VCMPPS, CPU Feature: AVX func (x Float32x8) NotEqual(y Float32x8) Mask32x8 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VCMPPS, CPU Feature: AVX512 func (x Float32x16) NotEqual(y Float32x16) Mask32x16 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x2) NotEqual(y Float64x2) Mask64x2 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VCMPPD, CPU Feature: AVX func (x Float64x4) NotEqual(y Float64x4) Mask64x4 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) NotEqual(y Float64x8) Mask64x8 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VPCMPB, CPU Feature: AVX512 func (x Int8x64) NotEqual(y Int8x64) Mask8x64 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VPCMPW, CPU Feature: AVX512 func (x Int16x32) NotEqual(y Int16x32) Mask16x32 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VPCMPD, CPU Feature: AVX512 func (x Int32x16) NotEqual(y Int32x16) Mask32x16 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VPCMPQ, CPU Feature: AVX512 func (x Int64x8) NotEqual(y Int64x8) Mask64x8 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16 -// NotEqual returns x not-equals y, elementwise. +// NotEqual returns a mask whose elements indicate whether x != y. // // Asm: VPCMPUQ, CPU Feature: AVX512 func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8 @@ -4588,169 +4551,217 @@ func (x Uint64x8) Or(y Uint64x8) Uint64x8 /* Permute */ // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Int8x16) Permute(indices Uint8x16) Int8x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Uint8x16) Permute(indices Uint8x16) Uint8x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 5 bits (values 0-31) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 5 bits (values 0-31) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Int8x32) Permute(indices Uint8x32) Int8x32 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 5 bits (values 0-31) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 5 bits (values 0-31) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Uint8x32) Permute(indices Uint8x32) Uint8x32 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 6 bits (values 0-63) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 6 bits (values 0-63) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Int8x64) Permute(indices Uint8x64) Int8x64 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 6 bits (values 0-63) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 6 bits (values 0-63) of each element of indices is used. // // Asm: VPERMB, CPU Feature: AVX512VBMI func (x Uint8x64) Permute(indices Uint8x64) Uint8x64 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Int16x8) Permute(indices Uint16x8) Int16x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Uint16x8) Permute(indices Uint16x8) Uint16x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Int16x16) Permute(indices Uint16x16) Int16x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Uint16x16) Permute(indices Uint16x16) Uint16x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 5 bits (values 0-31) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 5 bits (values 0-31) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Int16x32) Permute(indices Uint16x32) Int16x32 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 5 bits (values 0-31) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 5 bits (values 0-31) of each element of indices is used. // // Asm: VPERMW, CPU Feature: AVX512 func (x Uint16x32) Permute(indices Uint16x32) Uint16x32 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMPS, CPU Feature: AVX2 func (x Float32x8) Permute(indices Uint32x8) Float32x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMD, CPU Feature: AVX2 func (x Int32x8) Permute(indices Uint32x8) Int32x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMD, CPU Feature: AVX2 func (x Uint32x8) Permute(indices Uint32x8) Uint32x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMPS, CPU Feature: AVX512 func (x Float32x16) Permute(indices Uint32x16) Float32x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMD, CPU Feature: AVX512 func (x Int32x16) Permute(indices Uint32x16) Int32x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 4 bits (values 0-15) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 4 bits (values 0-15) of each element of indices is used. // // Asm: VPERMD, CPU Feature: AVX512 func (x Uint32x16) Permute(indices Uint32x16) Uint32x16 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 2 bits (values 0-3) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 2 bits (values 0-3) of each element of indices is used. // // Asm: VPERMPD, CPU Feature: AVX512 func (x Float64x4) Permute(indices Uint64x4) Float64x4 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 2 bits (values 0-3) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 2 bits (values 0-3) of each element of indices is used. // // Asm: VPERMQ, CPU Feature: AVX512 func (x Int64x4) Permute(indices Uint64x4) Int64x4 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 2 bits (values 0-3) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 2 bits (values 0-3) of each element of indices is used. // // Asm: VPERMQ, CPU Feature: AVX512 func (x Uint64x4) Permute(indices Uint64x4) Uint64x4 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMPD, CPU Feature: AVX512 func (x Float64x8) Permute(indices Uint64x8) Float64x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMQ, CPU Feature: AVX512 func (x Int64x8) Permute(indices Uint64x8) Int64x8 // Permute performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} -// The low 3 bits (values 0-7) of each element of indices is used +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// The low 3 bits (values 0-7) of each element of indices is used. // // Asm: VPERMQ, CPU Feature: AVX512 func (x Uint64x8) Permute(indices Uint64x8) Uint64x8 @@ -4758,7 +4769,9 @@ func (x Uint64x8) Permute(indices Uint64x8) Uint64x8 /* PermuteOrZero */ // PermuteOrZero performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// // The lower four bits of each byte-sized index in indices select an element from x, // unless the index's sign bit is set in which case zero is used instead. // @@ -4766,7 +4779,9 @@ func (x Uint64x8) Permute(indices Uint64x8) Uint64x8 func (x Int8x16) PermuteOrZero(indices Int8x16) Int8x16 // PermuteOrZero performs a full permutation of vector x using indices: -// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// +// result = {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// // The lower four bits of each byte-sized index in indices select an element from x, // unless the index's sign bit is set in which case zero is used instead. // @@ -4776,7 +4791,9 @@ func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16 /* PermuteOrZeroGrouped */ // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: -// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -4785,7 +4802,9 @@ func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16 func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32 // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: -// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -4794,7 +4813,9 @@ func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32 func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64 // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: -// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -4803,7 +4824,9 @@ func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64 func (x Uint8x32) PermuteOrZeroGrouped(indices Int8x32) Uint8x32 // PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: -// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// // The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, // unless the index's sign bit is set in which case zero is used instead. // Each group is of size 128-bit. @@ -4877,84 +4900,84 @@ func (x Float64x8) ReciprocalSqrt() Float64x8 /* RotateAllLeft */ -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLD, CPU Feature: AVX512 func (x Int32x4) RotateAllLeft(shift uint8) Int32x4 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLD, CPU Feature: AVX512 func (x Int32x8) RotateAllLeft(shift uint8) Int32x8 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLD, CPU Feature: AVX512 func (x Int32x16) RotateAllLeft(shift uint8) Int32x16 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLQ, CPU Feature: AVX512 func (x Int64x2) RotateAllLeft(shift uint8) Int64x2 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLQ, CPU Feature: AVX512 func (x Int64x4) RotateAllLeft(shift uint8) Int64x4 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLQ, CPU Feature: AVX512 func (x Int64x8) RotateAllLeft(shift uint8) Int64x8 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLD, CPU Feature: AVX512 func (x Uint32x4) RotateAllLeft(shift uint8) Uint32x4 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLD, CPU Feature: AVX512 func (x Uint32x8) RotateAllLeft(shift uint8) Uint32x8 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLD, CPU Feature: AVX512 func (x Uint32x16) RotateAllLeft(shift uint8) Uint32x16 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLQ, CPU Feature: AVX512 func (x Uint64x2) RotateAllLeft(shift uint8) Uint64x2 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPROLQ, CPU Feature: AVX512 func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4 -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// RotateAllLeft rotates each element to the left by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // @@ -4963,84 +4986,84 @@ func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8 /* RotateAllRight */ -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORD, CPU Feature: AVX512 func (x Int32x4) RotateAllRight(shift uint8) Int32x4 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORD, CPU Feature: AVX512 func (x Int32x8) RotateAllRight(shift uint8) Int32x8 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORD, CPU Feature: AVX512 func (x Int32x16) RotateAllRight(shift uint8) Int32x16 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORQ, CPU Feature: AVX512 func (x Int64x2) RotateAllRight(shift uint8) Int64x2 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORQ, CPU Feature: AVX512 func (x Int64x4) RotateAllRight(shift uint8) Int64x4 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORQ, CPU Feature: AVX512 func (x Int64x8) RotateAllRight(shift uint8) Int64x8 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORD, CPU Feature: AVX512 func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORD, CPU Feature: AVX512 func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORD, CPU Feature: AVX512 func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORQ, CPU Feature: AVX512 func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPRORQ, CPU Feature: AVX512 func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4 -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// RotateAllRight rotates each element to the right by the number of bits specified by shift. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // @@ -5173,22 +5196,22 @@ func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8 /* RoundToEven */ -// RoundToEven rounds elements to the nearest integer. +// RoundToEven rounds elements to the nearest integer, rounding ties to even. // // Asm: VROUNDPS, CPU Feature: AVX func (x Float32x4) RoundToEven() Float32x4 -// RoundToEven rounds elements to the nearest integer. +// RoundToEven rounds elements to the nearest integer, rounding ties to even. // // Asm: VROUNDPS, CPU Feature: AVX func (x Float32x8) RoundToEven() Float32x8 -// RoundToEven rounds elements to the nearest integer. +// RoundToEven rounds elements to the nearest integer, rounding ties to even. // // Asm: VROUNDPD, CPU Feature: AVX func (x Float64x2) RoundToEven() Float64x2 -// RoundToEven rounds elements to the nearest integer. +// RoundToEven rounds elements to the nearest integer, rounding ties to even. // // Asm: VROUNDPD, CPU Feature: AVX func (x Float64x4) RoundToEven() Float64x4 @@ -5365,334 +5388,304 @@ func (x Uint32x4) SHA256TwoRounds(y Uint32x4, z Uint32x4) Uint32x4 /* SaturateToInt8 */ -// SaturateToInt8 converts element values to int8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToInt8 converts element values to int8 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSWB, CPU Feature: AVX512 func (x Int16x8) SaturateToInt8() Int8x16 -// SaturateToInt8 converts element values to int8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToInt8 converts element values to int8 with signed saturation. // // Asm: VPMOVSWB, CPU Feature: AVX512 func (x Int16x16) SaturateToInt8() Int8x16 -// SaturateToInt8 converts element values to int8. -// Conversion is done with saturation on the vector elements. +// SaturateToInt8 converts element values to int8 with signed saturation. // // Asm: VPMOVSWB, CPU Feature: AVX512 func (x Int16x32) SaturateToInt8() Int8x32 -// SaturateToInt8 converts element values to int8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToInt8 converts element values to int8 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSDB, CPU Feature: AVX512 func (x Int32x4) SaturateToInt8() Int8x16 -// SaturateToInt8 converts element values to int8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToInt8 converts element values to int8 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSDB, CPU Feature: AVX512 func (x Int32x8) SaturateToInt8() Int8x16 -// SaturateToInt8 converts element values to int8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToInt8 converts element values to int8 with signed saturation. // // Asm: VPMOVSDB, CPU Feature: AVX512 func (x Int32x16) SaturateToInt8() Int8x16 -// SaturateToInt8 converts element values to int8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToInt8 converts element values to int8 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSQB, CPU Feature: AVX512 func (x Int64x2) SaturateToInt8() Int8x16 -// SaturateToInt8 converts element values to int8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToInt8 converts element values to int8 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSQB, CPU Feature: AVX512 func (x Int64x4) SaturateToInt8() Int8x16 -// SaturateToInt8 converts element values to int8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToInt8 converts element values to int8 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSQB, CPU Feature: AVX512 func (x Int64x8) SaturateToInt8() Int8x16 /* SaturateToInt16 */ -// SaturateToInt16 converts element values to int16. -// Conversion is done with saturation on the vector elements. +// SaturateToInt16 converts element values to int16 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSDW, CPU Feature: AVX512 func (x Int32x4) SaturateToInt16() Int16x8 -// SaturateToInt16 converts element values to int16. -// Conversion is done with saturation on the vector elements. +// SaturateToInt16 converts element values to int16 with signed saturation. // // Asm: VPMOVSDW, CPU Feature: AVX512 func (x Int32x8) SaturateToInt16() Int16x8 -// SaturateToInt16 converts element values to int16. -// Conversion is done with saturation on the vector elements. +// SaturateToInt16 converts element values to int16 with signed saturation. // // Asm: VPMOVSDW, CPU Feature: AVX512 func (x Int32x16) SaturateToInt16() Int16x16 -// SaturateToInt16 converts element values to int16. -// Conversion is done with saturation on the vector elements. +// SaturateToInt16 converts element values to int16 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSQW, CPU Feature: AVX512 func (x Int64x2) SaturateToInt16() Int16x8 -// SaturateToInt16 converts element values to int16. -// Conversion is done with saturation on the vector elements. +// SaturateToInt16 converts element values to int16 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSQW, CPU Feature: AVX512 func (x Int64x4) SaturateToInt16() Int16x8 -// SaturateToInt16 converts element values to int16. -// Conversion is done with saturation on the vector elements. +// SaturateToInt16 converts element values to int16 with signed saturation. // // Asm: VPMOVSQW, CPU Feature: AVX512 func (x Int64x8) SaturateToInt16() Int16x8 /* SaturateToInt16Concat */ -// SaturateToInt16Concat converts element values to int16. -// With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second input vector will be packed to the upper part of the result vector. -// Conversion is done with saturation on the vector elements. +// SaturateToInt16Concat converts element values to int16 with signed saturation. +// The converted elements from x will be packed to the lower part of the result vector, +// the converted elements from y will be packed to the upper part of the result vector. // // Asm: VPACKSSDW, CPU Feature: AVX func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8 -// SaturateToInt16Concat converts element values to int16. +/* SaturateToInt16ConcatGrouped */ + +// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation. // With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second input vector will be packed to the upper part of the result vector. -// Conversion is done with saturation on the vector elements. +// The converted elements from x will be packed to the lower part of the group in the result vector, +// the converted elements from y will be packed to the upper part of the group in the result vector. // // Asm: VPACKSSDW, CPU Feature: AVX2 -func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16 +func (x Int32x8) SaturateToInt16ConcatGrouped(y Int32x8) Int16x16 -// SaturateToInt16Concat converts element values to int16. +// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation. // With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second input vector will be packed to the upper part of the result vector. -// Conversion is done with saturation on the vector elements. +// The converted elements from x will be packed to the lower part of the group in the result vector, +// the converted elements from y will be packed to the upper part of the group in the result vector. // // Asm: VPACKSSDW, CPU Feature: AVX512 -func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32 +func (x Int32x16) SaturateToInt16ConcatGrouped(y Int32x16) Int16x32 /* SaturateToInt32 */ -// SaturateToInt32 converts element values to int32. -// Conversion is done with saturation on the vector elements. +// SaturateToInt32 converts element values to int32 with signed saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVSQD, CPU Feature: AVX512 func (x Int64x2) SaturateToInt32() Int32x4 -// SaturateToInt32 converts element values to int32. -// Conversion is done with saturation on the vector elements. +// SaturateToInt32 converts element values to int32 with signed saturation. // // Asm: VPMOVSQD, CPU Feature: AVX512 func (x Int64x4) SaturateToInt32() Int32x4 -// SaturateToInt32 converts element values to int32. -// Conversion is done with saturation on the vector elements. +// SaturateToInt32 converts element values to int32 with signed saturation. // // Asm: VPMOVSQD, CPU Feature: AVX512 func (x Int64x8) SaturateToInt32() Int32x8 /* SaturateToUint8 */ -// SaturateToUint8 converts element values to uint8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToUint8 converts element values to uint8 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // -// Asm: VPMOVSWB, CPU Feature: AVX512 -func (x Int16x8) SaturateToUint8() Int8x16 +// Asm: VPMOVUSWB, CPU Feature: AVX512 +func (x Uint16x8) SaturateToUint8() Uint8x16 -// SaturateToUint8 converts element values to uint8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToUint8 converts element values to uint8 with unsigned saturation. // -// Asm: VPMOVSWB, CPU Feature: AVX512 -func (x Int16x16) SaturateToUint8() Int8x16 +// Asm: VPMOVUSWB, CPU Feature: AVX512 +func (x Uint16x16) SaturateToUint8() Uint8x16 -// SaturateToUint8 converts element values to uint8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToUint8 converts element values to uint8 with unsigned saturation. // -// Asm: VPMOVSDB, CPU Feature: AVX512 -func (x Int32x4) SaturateToUint8() Int8x16 +// Asm: VPMOVUSWB, CPU Feature: AVX512 +func (x Uint16x32) SaturateToUint8() Uint8x32 -// SaturateToUint8 converts element values to uint8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToUint8 converts element values to uint8 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // -// Asm: VPMOVSDB, CPU Feature: AVX512 -func (x Int32x8) SaturateToUint8() Int8x16 +// Asm: VPMOVUSDB, CPU Feature: AVX512 +func (x Uint32x4) SaturateToUint8() Uint8x16 -// SaturateToUint8 converts element values to uint8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToUint8 converts element values to uint8 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // -// Asm: VPMOVSDB, CPU Feature: AVX512 -func (x Int32x16) SaturateToUint8() Int8x16 +// Asm: VPMOVUSDB, CPU Feature: AVX512 +func (x Uint32x8) SaturateToUint8() Uint8x16 -// SaturateToUint8 converts element values to uint8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToUint8 converts element values to uint8 with unsigned saturation. // -// Asm: VPMOVSQB, CPU Feature: AVX512 -func (x Int64x2) SaturateToUint8() Int8x16 +// Asm: VPMOVUSDB, CPU Feature: AVX512 +func (x Uint32x16) SaturateToUint8() Uint8x16 -// SaturateToUint8 converts element values to uint8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToUint8 converts element values to uint8 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // -// Asm: VPMOVSQB, CPU Feature: AVX512 -func (x Int64x4) SaturateToUint8() Int8x16 +// Asm: VPMOVUSQB, CPU Feature: AVX512 +func (x Uint64x2) SaturateToUint8() Uint8x16 -// SaturateToUint8 converts element values to uint8. -// Conversion is done with saturation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// SaturateToUint8 converts element values to uint8 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // -// Asm: VPMOVSQB, CPU Feature: AVX512 -func (x Int64x8) SaturateToUint8() Int8x16 +// Asm: VPMOVUSQB, CPU Feature: AVX512 +func (x Uint64x4) SaturateToUint8() Uint8x16 -// SaturateToUint8 converts element values to uint8. -// Conversion is done with saturation on the vector elements. +// SaturateToUint8 converts element values to uint8 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // -// Asm: VPMOVUSWB, CPU Feature: AVX512 -func (x Uint16x32) SaturateToUint8() Uint8x32 +// Asm: VPMOVUSQB, CPU Feature: AVX512 +func (x Uint64x8) SaturateToUint8() Uint8x16 /* SaturateToUint16 */ -// SaturateToUint16 converts element values to uint16. -// Conversion is done with saturation on the vector elements. +// SaturateToUint16 converts element values to uint16 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVUSDW, CPU Feature: AVX512 func (x Uint32x4) SaturateToUint16() Uint16x8 -// SaturateToUint16 converts element values to uint16. -// Conversion is done with saturation on the vector elements. +// SaturateToUint16 converts element values to uint16 with unsigned saturation. // // Asm: VPMOVUSDW, CPU Feature: AVX512 func (x Uint32x8) SaturateToUint16() Uint16x8 -// SaturateToUint16 converts element values to uint16. -// Conversion is done with saturation on the vector elements. +// SaturateToUint16 converts element values to uint16 with unsigned saturation. // // Asm: VPMOVUSDW, CPU Feature: AVX512 func (x Uint32x16) SaturateToUint16() Uint16x16 -// SaturateToUint16 converts element values to uint16. -// Conversion is done with saturation on the vector elements. +// SaturateToUint16 converts element values to uint16 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVUSQW, CPU Feature: AVX512 func (x Uint64x2) SaturateToUint16() Uint16x8 -// SaturateToUint16 converts element values to uint16. -// Conversion is done with saturation on the vector elements. +// SaturateToUint16 converts element values to uint16 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVUSQW, CPU Feature: AVX512 func (x Uint64x4) SaturateToUint16() Uint16x8 -// SaturateToUint16 converts element values to uint16. -// Conversion is done with saturation on the vector elements. +// SaturateToUint16 converts element values to uint16 with unsigned saturation. // // Asm: VPMOVUSQW, CPU Feature: AVX512 func (x Uint64x8) SaturateToUint16() Uint16x8 /* SaturateToUint16Concat */ -// SaturateToUint16Concat converts element values to uint16. -// With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second input vector will be packed to the upper part of the result vector. -// Conversion is done with saturation on the vector elements. +// SaturateToUint16Concat converts element values to uint16 with unsigned saturation. +// The converted elements from x will be packed to the lower part of the result vector, +// the converted elements from y will be packed to the upper part of the result vector. // // Asm: VPACKUSDW, CPU Feature: AVX -func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8 +func (x Int32x4) SaturateToUint16Concat(y Int32x4) Uint16x8 + +/* SaturateToUint16ConcatGrouped */ -// SaturateToUint16Concat converts element values to uint16. +// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation. // With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second input vector will be packed to the upper part of the result vector. -// Conversion is done with saturation on the vector elements. +// The converted elements from x will be packed to the lower part of the group in the result vector, +// the converted elements from y will be packed to the upper part of the group in the result vector. // // Asm: VPACKUSDW, CPU Feature: AVX2 -func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16 +func (x Int32x8) SaturateToUint16ConcatGrouped(y Int32x8) Uint16x16 -// SaturateToUint16Concat converts element values to uint16. +// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation. // With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second input vector will be packed to the upper part of the result vector. -// Conversion is done with saturation on the vector elements. +// The converted elements from x will be packed to the lower part of the group in the result vector, +// the converted elements from y will be packed to the upper part of the group in the result vector. // // Asm: VPACKUSDW, CPU Feature: AVX512 -func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32 +func (x Int32x16) SaturateToUint16ConcatGrouped(y Int32x16) Uint16x32 /* SaturateToUint32 */ -// SaturateToUint32 converts element values to uint32. -// Conversion is done with saturation on the vector elements. +// SaturateToUint32 converts element values to uint32 with unsigned saturation. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVUSQD, CPU Feature: AVX512 func (x Uint64x2) SaturateToUint32() Uint32x4 -// SaturateToUint32 converts element values to uint32. -// Conversion is done with saturation on the vector elements. +// SaturateToUint32 converts element values to uint32 with unsigned saturation. // // Asm: VPMOVUSQD, CPU Feature: AVX512 func (x Uint64x4) SaturateToUint32() Uint32x4 -// SaturateToUint32 converts element values to uint32. -// Conversion is done with saturation on the vector elements. +// SaturateToUint32 converts element values to uint32 with unsigned saturation. // // Asm: VPMOVUSQD, CPU Feature: AVX512 func (x Uint64x8) SaturateToUint32() Uint32x8 /* Scale */ -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPS, CPU Feature: AVX512 func (x Float32x4) Scale(y Float32x4) Float32x4 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPS, CPU Feature: AVX512 func (x Float32x8) Scale(y Float32x8) Float32x8 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPS, CPU Feature: AVX512 func (x Float32x16) Scale(y Float32x16) Float32x16 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPD, CPU Feature: AVX512 func (x Float64x2) Scale(y Float64x2) Float64x2 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPD, CPU Feature: AVX512 func (x Float64x4) Scale(y Float64x4) Float64x4 -// Scale multiplies elements by a power of 2. +// Scale multiplies each element of x by 2 raised to the power of the +// floor of the corresponding element in y. // // Asm: VSCALEFPD, CPU Feature: AVX512 func (x Float64x8) Scale(y Float64x8) Float64x8 @@ -6131,236 +6124,236 @@ func (x Uint64x8) SetLo(y Uint64x4) Uint64x8 /* ShiftAllLeft */ -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLW, CPU Feature: AVX func (x Int16x8) ShiftAllLeft(y uint64) Int16x8 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLW, CPU Feature: AVX2 func (x Int16x16) ShiftAllLeft(y uint64) Int16x16 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLW, CPU Feature: AVX512 func (x Int16x32) ShiftAllLeft(y uint64) Int16x32 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLD, CPU Feature: AVX func (x Int32x4) ShiftAllLeft(y uint64) Int32x4 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLD, CPU Feature: AVX2 func (x Int32x8) ShiftAllLeft(y uint64) Int32x8 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLD, CPU Feature: AVX512 func (x Int32x16) ShiftAllLeft(y uint64) Int32x16 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLQ, CPU Feature: AVX func (x Int64x2) ShiftAllLeft(y uint64) Int64x2 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLQ, CPU Feature: AVX2 func (x Int64x4) ShiftAllLeft(y uint64) Int64x4 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLQ, CPU Feature: AVX512 func (x Int64x8) ShiftAllLeft(y uint64) Int64x8 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLW, CPU Feature: AVX func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLW, CPU Feature: AVX2 func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLW, CPU Feature: AVX512 func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLD, CPU Feature: AVX func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLD, CPU Feature: AVX2 func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLD, CPU Feature: AVX512 func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLQ, CPU Feature: AVX func (x Uint64x2) ShiftAllLeft(y uint64) Uint64x2 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLQ, CPU Feature: AVX2 func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4 -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// ShiftAllLeft shifts each element to the left by y bits. // // Asm: VPSLLQ, CPU Feature: AVX512 func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8 /* ShiftAllLeftConcat */ -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 func (x Int16x8) ShiftAllLeftConcat(shift uint8, y Int16x8) Int16x8 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 func (x Int16x16) ShiftAllLeftConcat(shift uint8, y Int16x16) Int16x16 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 func (x Int16x32) ShiftAllLeftConcat(shift uint8, y Int16x32) Int16x32 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 func (x Int32x4) ShiftAllLeftConcat(shift uint8, y Int32x4) Int32x4 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 func (x Int32x8) ShiftAllLeftConcat(shift uint8, y Int32x8) Int32x8 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 func (x Int32x16) ShiftAllLeftConcat(shift uint8, y Int32x16) Int32x16 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 func (x Int64x2) ShiftAllLeftConcat(shift uint8, y Int64x2) Int64x2 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 func (x Int64x4) ShiftAllLeftConcat(shift uint8, y Int64x4) Int64x4 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 func (x Int64x8) ShiftAllLeftConcat(shift uint8, y Int64x8) Int64x8 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 func (x Uint16x8) ShiftAllLeftConcat(shift uint8, y Uint16x8) Uint16x8 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 func (x Uint16x16) ShiftAllLeftConcat(shift uint8, y Uint16x16) Uint16x16 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDW, CPU Feature: AVX512VBMI2 func (x Uint16x32) ShiftAllLeftConcat(shift uint8, y Uint16x32) Uint16x32 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 func (x Uint32x4) ShiftAllLeftConcat(shift uint8, y Uint32x4) Uint32x4 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 func (x Uint32x8) ShiftAllLeftConcat(shift uint8, y Uint32x8) Uint32x8 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDD, CPU Feature: AVX512VBMI2 func (x Uint32x16) ShiftAllLeftConcat(shift uint8, y Uint32x16) Uint32x16 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 func (x Uint64x2) ShiftAllLeftConcat(shift uint8, y Uint64x2) Uint64x2 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4 -// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // @@ -6369,236 +6362,236 @@ func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8 /* ShiftAllRight */ -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRight performs a signed right shift on each element by y bits. // // Asm: VPSRAW, CPU Feature: AVX func (x Int16x8) ShiftAllRight(y uint64) Int16x8 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRight performs a signed right shift on each element by y bits. // // Asm: VPSRAW, CPU Feature: AVX2 func (x Int16x16) ShiftAllRight(y uint64) Int16x16 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRight performs a signed right shift on each element by y bits. // // Asm: VPSRAW, CPU Feature: AVX512 func (x Int16x32) ShiftAllRight(y uint64) Int16x32 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRight performs a signed right shift on each element by y bits. // // Asm: VPSRAD, CPU Feature: AVX func (x Int32x4) ShiftAllRight(y uint64) Int32x4 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRight performs a signed right shift on each element by y bits. // // Asm: VPSRAD, CPU Feature: AVX2 func (x Int32x8) ShiftAllRight(y uint64) Int32x8 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRight performs a signed right shift on each element by y bits. // // Asm: VPSRAD, CPU Feature: AVX512 func (x Int32x16) ShiftAllRight(y uint64) Int32x16 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRight performs a signed right shift on each element by y bits. // // Asm: VPSRAQ, CPU Feature: AVX512 func (x Int64x2) ShiftAllRight(y uint64) Int64x2 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRight performs a signed right shift on each element by y bits. // // Asm: VPSRAQ, CPU Feature: AVX512 func (x Int64x4) ShiftAllRight(y uint64) Int64x4 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRight performs a signed right shift on each element by y bits. // // Asm: VPSRAQ, CPU Feature: AVX512 func (x Int64x8) ShiftAllRight(y uint64) Int64x8 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight performs an unsigned right shift on each element by y bits. // // Asm: VPSRLW, CPU Feature: AVX func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight performs an unsigned right shift on each element by y bits. // // Asm: VPSRLW, CPU Feature: AVX2 func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight performs an unsigned right shift on each element by y bits. // // Asm: VPSRLW, CPU Feature: AVX512 func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight performs an unsigned right shift on each element by y bits. // // Asm: VPSRLD, CPU Feature: AVX func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight performs an unsigned right shift on each element by y bits. // // Asm: VPSRLD, CPU Feature: AVX2 func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight performs an unsigned right shift on each element by y bits. // // Asm: VPSRLD, CPU Feature: AVX512 func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight performs an unsigned right shift on each element by y bits. // // Asm: VPSRLQ, CPU Feature: AVX func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight performs an unsigned right shift on each element by y bits. // // Asm: VPSRLQ, CPU Feature: AVX2 func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight performs an unsigned right shift on each element by y bits. // // Asm: VPSRLQ, CPU Feature: AVX512 func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8 /* ShiftAllRightConcat */ -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 func (x Int16x8) ShiftAllRightConcat(shift uint8, y Int16x8) Int16x8 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 func (x Int16x16) ShiftAllRightConcat(shift uint8, y Int16x16) Int16x16 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 func (x Int16x32) ShiftAllRightConcat(shift uint8, y Int16x32) Int16x32 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 func (x Int32x4) ShiftAllRightConcat(shift uint8, y Int32x4) Int32x4 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 func (x Int32x8) ShiftAllRightConcat(shift uint8, y Int32x8) Int32x8 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 func (x Int32x16) ShiftAllRightConcat(shift uint8, y Int32x16) Int32x16 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 func (x Int64x2) ShiftAllRightConcat(shift uint8, y Int64x2) Int64x2 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 func (x Int64x4) ShiftAllRightConcat(shift uint8, y Int64x4) Int64x4 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 func (x Int64x8) ShiftAllRightConcat(shift uint8, y Int64x8) Int64x8 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 func (x Uint16x8) ShiftAllRightConcat(shift uint8, y Uint16x8) Uint16x8 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 func (x Uint16x16) ShiftAllRightConcat(shift uint8, y Uint16x16) Uint16x16 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDW, CPU Feature: AVX512VBMI2 func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDD, CPU Feature: AVX512VBMI2 func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4 -// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by +// shift (only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. // @@ -6607,92 +6600,92 @@ func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8 /* ShiftLeft */ -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVW, CPU Feature: AVX512 func (x Int16x8) ShiftLeft(y Int16x8) Int16x8 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVW, CPU Feature: AVX512 func (x Int16x16) ShiftLeft(y Int16x16) Int16x16 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVW, CPU Feature: AVX512 func (x Int16x32) ShiftLeft(y Int16x32) Int16x32 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVD, CPU Feature: AVX2 func (x Int32x4) ShiftLeft(y Int32x4) Int32x4 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVD, CPU Feature: AVX2 func (x Int32x8) ShiftLeft(y Int32x8) Int32x8 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVD, CPU Feature: AVX512 func (x Int32x16) ShiftLeft(y Int32x16) Int32x16 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVQ, CPU Feature: AVX2 func (x Int64x2) ShiftLeft(y Int64x2) Int64x2 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVQ, CPU Feature: AVX2 func (x Int64x4) ShiftLeft(y Int64x4) Int64x4 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVQ, CPU Feature: AVX512 func (x Int64x8) ShiftLeft(y Int64x8) Int64x8 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVW, CPU Feature: AVX512 func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVW, CPU Feature: AVX512 func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVW, CPU Feature: AVX512 func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVD, CPU Feature: AVX2 func (x Uint32x4) ShiftLeft(y Uint32x4) Uint32x4 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVD, CPU Feature: AVX2 func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVD, CPU Feature: AVX512 func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVQ, CPU Feature: AVX2 func (x Uint64x2) ShiftLeft(y Uint64x2) Uint64x2 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVQ, CPU Feature: AVX2 func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4 -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. // // Asm: VPSLLVQ, CPU Feature: AVX512 func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8 @@ -6700,201 +6693,201 @@ func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8 /* ShiftLeftConcat */ // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 func (x Int16x8) ShiftLeftConcat(y Int16x8, z Int16x8) Int16x8 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 func (x Int16x16) ShiftLeftConcat(y Int16x16, z Int16x16) Int16x16 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 func (x Int16x32) ShiftLeftConcat(y Int16x32, z Int16x32) Int16x32 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 func (x Int32x4) ShiftLeftConcat(y Int32x4, z Int32x4) Int32x4 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 func (x Int32x8) ShiftLeftConcat(y Int32x8, z Int32x8) Int32x8 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 func (x Int32x16) ShiftLeftConcat(y Int32x16, z Int32x16) Int32x16 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 func (x Int64x2) ShiftLeftConcat(y Int64x2, z Int64x2) Int64x2 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 func (x Int64x4) ShiftLeftConcat(y Int64x4, z Int64x4) Int64x4 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 func (x Int64x8) ShiftLeftConcat(y Int64x8, z Int64x8) Int64x8 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 func (x Uint16x8) ShiftLeftConcat(y Uint16x8, z Uint16x8) Uint16x8 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 func (x Uint16x16) ShiftLeftConcat(y Uint16x16, z Uint16x16) Uint16x16 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4 // ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. // // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8 /* ShiftRight */ -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRAVW, CPU Feature: AVX512 func (x Int16x8) ShiftRight(y Int16x8) Int16x8 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRAVW, CPU Feature: AVX512 func (x Int16x16) ShiftRight(y Int16x16) Int16x16 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRAVW, CPU Feature: AVX512 func (x Int16x32) ShiftRight(y Int16x32) Int16x32 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRAVD, CPU Feature: AVX2 func (x Int32x4) ShiftRight(y Int32x4) Int32x4 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRAVD, CPU Feature: AVX2 func (x Int32x8) ShiftRight(y Int32x8) Int32x8 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRAVD, CPU Feature: AVX512 func (x Int32x16) ShiftRight(y Int32x16) Int32x16 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRAVQ, CPU Feature: AVX512 func (x Int64x2) ShiftRight(y Int64x2) Int64x2 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRAVQ, CPU Feature: AVX512 func (x Int64x4) ShiftRight(y Int64x4) Int64x4 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// ShiftRight performs a signed right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRAVQ, CPU Feature: AVX512 func (x Int64x8) ShiftRight(y Int64x8) Int64x8 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRLVW, CPU Feature: AVX512 func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRLVW, CPU Feature: AVX512 func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRLVW, CPU Feature: AVX512 func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRLVD, CPU Feature: AVX2 func (x Uint32x4) ShiftRight(y Uint32x4) Uint32x4 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRLVD, CPU Feature: AVX2 func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRLVD, CPU Feature: AVX512 func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRLVQ, CPU Feature: AVX2 func (x Uint64x2) ShiftRight(y Uint64x2) Uint64x2 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRLVQ, CPU Feature: AVX2 func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight performs an unsigned right shift on each element in x by the number of bits specified in y's corresponding elements. // // Asm: VPSRLVQ, CPU Feature: AVX512 func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8 @@ -6902,109 +6895,109 @@ func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8 /* ShiftRightConcat */ // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 func (x Int16x8) ShiftRightConcat(y Int16x8, z Int16x8) Int16x8 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 func (x Int16x16) ShiftRightConcat(y Int16x16, z Int16x16) Int16x16 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 func (x Int16x32) ShiftRightConcat(y Int16x32, z Int16x32) Int16x32 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 func (x Int32x4) ShiftRightConcat(y Int32x4, z Int32x4) Int32x4 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 func (x Int32x8) ShiftRightConcat(y Int32x8, z Int32x8) Int32x8 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 func (x Int32x16) ShiftRightConcat(y Int32x16, z Int32x16) Int32x16 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4 // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// corresponding elements in y (only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. // // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8 @@ -7196,90 +7189,101 @@ func (x Uint64x8) Sub(y Uint64x8) Uint64x8 /* SubPairs */ // SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // // Asm: VHSUBPS, CPU Feature: AVX func (x Float32x4) SubPairs(y Float32x4) Float32x4 // SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. -// -// Asm: VHSUBPS, CPU Feature: AVX -func (x Float32x8) SubPairs(y Float32x8) Float32x8 - -// SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// For x = [x0, x1] and y = [y0, y1], the result is [x0-x1, y0-y1]. // // Asm: VHSUBPD, CPU Feature: AVX func (x Float64x2) SubPairs(y Float64x2) Float64x2 // SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // -// Asm: VHSUBPD, CPU Feature: AVX -func (x Float64x4) SubPairs(y Float64x4) Float64x4 +// Asm: VPHSUBW, CPU Feature: AVX +func (x Int16x8) SubPairs(y Int16x8) Int16x8 // SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // -// Asm: VPHSUBW, CPU Feature: AVX -func (x Int16x8) SubPairs(y Int16x8) Int16x8 +// Asm: VPHSUBD, CPU Feature: AVX +func (x Int32x4) SubPairs(y Int32x4) Int32x4 // SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // -// Asm: VPHSUBW, CPU Feature: AVX2 -func (x Int16x16) SubPairs(y Int16x16) Int16x16 +// Asm: VPHSUBW, CPU Feature: AVX +func (x Uint16x8) SubPairs(y Uint16x8) Uint16x8 // SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // // Asm: VPHSUBD, CPU Feature: AVX -func (x Int32x4) SubPairs(y Int32x4) Int32x4 +func (x Uint32x4) SubPairs(y Uint32x4) Uint32x4 -// SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +/* SubPairsGrouped */ + +// SubPairsGrouped horizontally subtracts adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // -// Asm: VPHSUBD, CPU Feature: AVX2 -func (x Int32x8) SubPairs(y Int32x8) Int32x8 +// Asm: VHSUBPS, CPU Feature: AVX +func (x Float32x8) SubPairsGrouped(y Float32x8) Float32x8 -// SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// SubPairsGrouped horizontally subtracts adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1] and y = [y0, y1], the result is [x0-x1, y0-y1]. // -// Asm: VPHSUBW, CPU Feature: AVX -func (x Uint16x8) SubPairs(y Uint16x8) Uint16x8 +// Asm: VHSUBPD, CPU Feature: AVX +func (x Float64x4) SubPairsGrouped(y Float64x4) Float64x4 -// SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// SubPairsGrouped horizontally subtracts adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // // Asm: VPHSUBW, CPU Feature: AVX2 -func (x Uint16x16) SubPairs(y Uint16x16) Uint16x16 +func (x Int16x16) SubPairsGrouped(y Int16x16) Int16x16 -// SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// SubPairsGrouped horizontally subtracts adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // -// Asm: VPHSUBD, CPU Feature: AVX -func (x Uint32x4) SubPairs(y Uint32x4) Uint32x4 +// Asm: VPHSUBD, CPU Feature: AVX2 +func (x Int32x8) SubPairsGrouped(y Int32x8) Int32x8 -// SubPairs horizontally subtracts adjacent pairs of elements. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// SubPairsGrouped horizontally subtracts adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. +// +// Asm: VPHSUBW, CPU Feature: AVX2 +func (x Uint16x16) SubPairsGrouped(y Uint16x16) Uint16x16 + +// SubPairsGrouped horizontally subtracts adjacent pairs of elements. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // // Asm: VPHSUBD, CPU Feature: AVX2 -func (x Uint32x8) SubPairs(y Uint32x8) Uint32x8 +func (x Uint32x8) SubPairsGrouped(y Uint32x8) Uint32x8 /* SubPairsSaturated */ // SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // // Asm: VPHSUBSW, CPU Feature: AVX func (x Int16x8) SubPairsSaturated(y Int16x8) Int16x8 -// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation. -// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +/* SubPairsSaturatedGrouped */ + +// SubPairsSaturatedGrouped horizontally subtracts adjacent pairs of elements with saturation. +// With each 128-bit as a group: +// for x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [x0-x1, x2-x3, ..., y0-y1, y2-y3, ...]. // // Asm: VPHSUBSW, CPU Feature: AVX2 -func (x Int16x16) SubPairsSaturated(y Int16x16) Int16x16 +func (x Int16x16) SubPairsSaturatedGrouped(y Int16x16) Int16x16 /* SubSaturated */ @@ -7478,244 +7482,212 @@ func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8 /* TruncateToInt8 */ -// TruncateToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToInt8 truncates element values to int8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVWB, CPU Feature: AVX512 func (x Int16x8) TruncateToInt8() Int8x16 -// TruncateToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToInt8 truncates element values to int8. // // Asm: VPMOVWB, CPU Feature: AVX512 func (x Int16x16) TruncateToInt8() Int8x16 -// TruncateToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. +// TruncateToInt8 truncates element values to int8. // // Asm: VPMOVWB, CPU Feature: AVX512 func (x Int16x32) TruncateToInt8() Int8x32 -// TruncateToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToInt8 truncates element values to int8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVDB, CPU Feature: AVX512 func (x Int32x4) TruncateToInt8() Int8x16 -// TruncateToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToInt8 truncates element values to int8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVDB, CPU Feature: AVX512 func (x Int32x8) TruncateToInt8() Int8x16 -// TruncateToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToInt8 truncates element values to int8. // // Asm: VPMOVDB, CPU Feature: AVX512 func (x Int32x16) TruncateToInt8() Int8x16 -// TruncateToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToInt8 truncates element values to int8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQB, CPU Feature: AVX512 func (x Int64x2) TruncateToInt8() Int8x16 -// TruncateToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToInt8 truncates element values to int8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQB, CPU Feature: AVX512 func (x Int64x4) TruncateToInt8() Int8x16 -// TruncateToInt8 converts element values to int8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToInt8 truncates element values to int8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQB, CPU Feature: AVX512 func (x Int64x8) TruncateToInt8() Int8x16 /* TruncateToInt16 */ -// TruncateToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. +// TruncateToInt16 truncates element values to int16. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVDW, CPU Feature: AVX512 func (x Int32x4) TruncateToInt16() Int16x8 -// TruncateToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. +// TruncateToInt16 truncates element values to int16. // // Asm: VPMOVDW, CPU Feature: AVX512 func (x Int32x8) TruncateToInt16() Int16x8 -// TruncateToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. +// TruncateToInt16 truncates element values to int16. // // Asm: VPMOVDW, CPU Feature: AVX512 func (x Int32x16) TruncateToInt16() Int16x16 -// TruncateToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. +// TruncateToInt16 truncates element values to int16. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQW, CPU Feature: AVX512 func (x Int64x2) TruncateToInt16() Int16x8 -// TruncateToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. +// TruncateToInt16 truncates element values to int16. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQW, CPU Feature: AVX512 func (x Int64x4) TruncateToInt16() Int16x8 -// TruncateToInt16 converts element values to int16. -// Conversion is done with truncation on the vector elements. +// TruncateToInt16 truncates element values to int16. // // Asm: VPMOVQW, CPU Feature: AVX512 func (x Int64x8) TruncateToInt16() Int16x8 /* TruncateToInt32 */ -// TruncateToInt32 converts element values to int32. -// Conversion is done with truncation on the vector elements. +// TruncateToInt32 truncates element values to int32. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQD, CPU Feature: AVX512 func (x Int64x2) TruncateToInt32() Int32x4 -// TruncateToInt32 converts element values to int32. -// Conversion is done with truncation on the vector elements. +// TruncateToInt32 truncates element values to int32. // // Asm: VPMOVQD, CPU Feature: AVX512 func (x Int64x4) TruncateToInt32() Int32x4 -// TruncateToInt32 converts element values to int32. -// Conversion is done with truncation on the vector elements. +// TruncateToInt32 truncates element values to int32. // // Asm: VPMOVQD, CPU Feature: AVX512 func (x Int64x8) TruncateToInt32() Int32x8 /* TruncateToUint8 */ -// TruncateToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToUint8 truncates element values to uint8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVWB, CPU Feature: AVX512 func (x Uint16x8) TruncateToUint8() Uint8x16 -// TruncateToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToUint8 truncates element values to uint8. // // Asm: VPMOVWB, CPU Feature: AVX512 func (x Uint16x16) TruncateToUint8() Uint8x16 -// TruncateToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. +// TruncateToUint8 truncates element values to uint8. // // Asm: VPMOVWB, CPU Feature: AVX512 func (x Uint16x32) TruncateToUint8() Uint8x32 -// TruncateToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToUint8 truncates element values to uint8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVDB, CPU Feature: AVX512 func (x Uint32x4) TruncateToUint8() Uint8x16 -// TruncateToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToUint8 truncates element values to uint8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVDB, CPU Feature: AVX512 func (x Uint32x8) TruncateToUint8() Uint8x16 -// TruncateToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToUint8 truncates element values to uint8. // // Asm: VPMOVDB, CPU Feature: AVX512 func (x Uint32x16) TruncateToUint8() Uint8x16 -// TruncateToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToUint8 truncates element values to uint8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQB, CPU Feature: AVX512 func (x Uint64x2) TruncateToUint8() Uint8x16 -// TruncateToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToUint8 truncates element values to uint8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQB, CPU Feature: AVX512 func (x Uint64x4) TruncateToUint8() Uint8x16 -// TruncateToUint8 converts element values to uint8. -// Conversion is done with truncation on the vector elements. -// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// TruncateToUint8 truncates element values to uint8. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQB, CPU Feature: AVX512 func (x Uint64x8) TruncateToUint8() Uint8x16 /* TruncateToUint16 */ -// TruncateToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. +// TruncateToUint16 truncates element values to uint16. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVDW, CPU Feature: AVX512 func (x Uint32x4) TruncateToUint16() Uint16x8 -// TruncateToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. +// TruncateToUint16 truncates element values to uint16. // // Asm: VPMOVDW, CPU Feature: AVX512 func (x Uint32x8) TruncateToUint16() Uint16x8 -// TruncateToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. +// TruncateToUint16 truncates element values to uint16. // // Asm: VPMOVDW, CPU Feature: AVX512 func (x Uint32x16) TruncateToUint16() Uint16x16 -// TruncateToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. +// TruncateToUint16 truncates element values to uint16. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQW, CPU Feature: AVX512 func (x Uint64x2) TruncateToUint16() Uint16x8 -// TruncateToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. +// TruncateToUint16 truncates element values to uint16. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQW, CPU Feature: AVX512 func (x Uint64x4) TruncateToUint16() Uint16x8 -// TruncateToUint16 converts element values to uint16. -// Conversion is done with truncation on the vector elements. +// TruncateToUint16 truncates element values to uint16. // // Asm: VPMOVQW, CPU Feature: AVX512 func (x Uint64x8) TruncateToUint16() Uint16x8 /* TruncateToUint32 */ -// TruncateToUint32 converts element values to uint32. -// Conversion is done with truncation on the vector elements. +// TruncateToUint32 truncates element values to uint32. +// Results are packed to low elements in the returned vector, its upper elements are zeroed. // // Asm: VPMOVQD, CPU Feature: AVX512 func (x Uint64x2) TruncateToUint32() Uint32x4 -// TruncateToUint32 converts element values to uint32. -// Conversion is done with truncation on the vector elements. +// TruncateToUint32 truncates element values to uint32. // // Asm: VPMOVQD, CPU Feature: AVX512 func (x Uint64x4) TruncateToUint32() Uint32x4 -// TruncateToUint32 converts element values to uint32. -// Conversion is done with truncation on the vector elements. +// TruncateToUint32 truncates element values to uint32. // // Asm: VPMOVQD, CPU Feature: AVX512 func (x Uint64x8) TruncateToUint32() Uint32x8 @@ -7842,930 +7814,930 @@ func (x Uint64x4) Xor(y Uint64x4) Uint64x4 // Asm: VPXORQ, CPU Feature: AVX512 func (x Uint64x8) Xor(y Uint64x8) Uint64x8 -// Float64x2 converts from Float32x4 to Float64x2 -func (from Float32x4) AsFloat64x2() (to Float64x2) +// AsFloat64x2 returns a Float64x2 with the same bit representation as x. +func (x Float32x4) AsFloat64x2() Float64x2 -// Int8x16 converts from Float32x4 to Int8x16 -func (from Float32x4) AsInt8x16() (to Int8x16) +// AsInt8x16 returns an Int8x16 with the same bit representation as x. +func (x Float32x4) AsInt8x16() Int8x16 -// Int16x8 converts from Float32x4 to Int16x8 -func (from Float32x4) AsInt16x8() (to Int16x8) +// AsInt16x8 returns an Int16x8 with the same bit representation as x. +func (x Float32x4) AsInt16x8() Int16x8 -// Int32x4 converts from Float32x4 to Int32x4 -func (from Float32x4) AsInt32x4() (to Int32x4) +// AsInt32x4 returns an Int32x4 with the same bit representation as x. +func (x Float32x4) AsInt32x4() Int32x4 -// Int64x2 converts from Float32x4 to Int64x2 -func (from Float32x4) AsInt64x2() (to Int64x2) +// AsInt64x2 returns an Int64x2 with the same bit representation as x. +func (x Float32x4) AsInt64x2() Int64x2 -// Uint8x16 converts from Float32x4 to Uint8x16 -func (from Float32x4) AsUint8x16() (to Uint8x16) +// AsUint8x16 returns a Uint8x16 with the same bit representation as x. +func (x Float32x4) AsUint8x16() Uint8x16 -// Uint16x8 converts from Float32x4 to Uint16x8 -func (from Float32x4) AsUint16x8() (to Uint16x8) +// AsUint16x8 returns a Uint16x8 with the same bit representation as x. +func (x Float32x4) AsUint16x8() Uint16x8 -// Uint32x4 converts from Float32x4 to Uint32x4 -func (from Float32x4) AsUint32x4() (to Uint32x4) +// AsUint32x4 returns a Uint32x4 with the same bit representation as x. +func (x Float32x4) AsUint32x4() Uint32x4 -// Uint64x2 converts from Float32x4 to Uint64x2 -func (from Float32x4) AsUint64x2() (to Uint64x2) +// AsUint64x2 returns a Uint64x2 with the same bit representation as x. +func (x Float32x4) AsUint64x2() Uint64x2 -// Float64x4 converts from Float32x8 to Float64x4 -func (from Float32x8) AsFloat64x4() (to Float64x4) +// AsFloat64x4 returns a Float64x4 with the same bit representation as x. +func (x Float32x8) AsFloat64x4() Float64x4 -// Int8x32 converts from Float32x8 to Int8x32 -func (from Float32x8) AsInt8x32() (to Int8x32) +// AsInt8x32 returns an Int8x32 with the same bit representation as x. +func (x Float32x8) AsInt8x32() Int8x32 -// Int16x16 converts from Float32x8 to Int16x16 -func (from Float32x8) AsInt16x16() (to Int16x16) +// AsInt16x16 returns an Int16x16 with the same bit representation as x. +func (x Float32x8) AsInt16x16() Int16x16 -// Int32x8 converts from Float32x8 to Int32x8 -func (from Float32x8) AsInt32x8() (to Int32x8) +// AsInt32x8 returns an Int32x8 with the same bit representation as x. +func (x Float32x8) AsInt32x8() Int32x8 -// Int64x4 converts from Float32x8 to Int64x4 -func (from Float32x8) AsInt64x4() (to Int64x4) +// AsInt64x4 returns an Int64x4 with the same bit representation as x. +func (x Float32x8) AsInt64x4() Int64x4 -// Uint8x32 converts from Float32x8 to Uint8x32 -func (from Float32x8) AsUint8x32() (to Uint8x32) +// AsUint8x32 returns a Uint8x32 with the same bit representation as x. +func (x Float32x8) AsUint8x32() Uint8x32 -// Uint16x16 converts from Float32x8 to Uint16x16 -func (from Float32x8) AsUint16x16() (to Uint16x16) +// AsUint16x16 returns a Uint16x16 with the same bit representation as x. +func (x Float32x8) AsUint16x16() Uint16x16 -// Uint32x8 converts from Float32x8 to Uint32x8 -func (from Float32x8) AsUint32x8() (to Uint32x8) +// AsUint32x8 returns a Uint32x8 with the same bit representation as x. +func (x Float32x8) AsUint32x8() Uint32x8 -// Uint64x4 converts from Float32x8 to Uint64x4 -func (from Float32x8) AsUint64x4() (to Uint64x4) +// AsUint64x4 returns a Uint64x4 with the same bit representation as x. +func (x Float32x8) AsUint64x4() Uint64x4 -// Float64x8 converts from Float32x16 to Float64x8 -func (from Float32x16) AsFloat64x8() (to Float64x8) +// AsFloat64x8 returns a Float64x8 with the same bit representation as x. +func (x Float32x16) AsFloat64x8() Float64x8 -// Int8x64 converts from Float32x16 to Int8x64 -func (from Float32x16) AsInt8x64() (to Int8x64) +// AsInt8x64 returns an Int8x64 with the same bit representation as x. +func (x Float32x16) AsInt8x64() Int8x64 -// Int16x32 converts from Float32x16 to Int16x32 -func (from Float32x16) AsInt16x32() (to Int16x32) +// AsInt16x32 returns an Int16x32 with the same bit representation as x. +func (x Float32x16) AsInt16x32() Int16x32 -// Int32x16 converts from Float32x16 to Int32x16 -func (from Float32x16) AsInt32x16() (to Int32x16) +// AsInt32x16 returns an Int32x16 with the same bit representation as x. +func (x Float32x16) AsInt32x16() Int32x16 -// Int64x8 converts from Float32x16 to Int64x8 -func (from Float32x16) AsInt64x8() (to Int64x8) +// AsInt64x8 returns an Int64x8 with the same bit representation as x. +func (x Float32x16) AsInt64x8() Int64x8 -// Uint8x64 converts from Float32x16 to Uint8x64 -func (from Float32x16) AsUint8x64() (to Uint8x64) +// AsUint8x64 returns a Uint8x64 with the same bit representation as x. +func (x Float32x16) AsUint8x64() Uint8x64 -// Uint16x32 converts from Float32x16 to Uint16x32 -func (from Float32x16) AsUint16x32() (to Uint16x32) +// AsUint16x32 returns a Uint16x32 with the same bit representation as x. +func (x Float32x16) AsUint16x32() Uint16x32 -// Uint32x16 converts from Float32x16 to Uint32x16 -func (from Float32x16) AsUint32x16() (to Uint32x16) +// AsUint32x16 returns a Uint32x16 with the same bit representation as x. +func (x Float32x16) AsUint32x16() Uint32x16 -// Uint64x8 converts from Float32x16 to Uint64x8 -func (from Float32x16) AsUint64x8() (to Uint64x8) +// AsUint64x8 returns a Uint64x8 with the same bit representation as x. +func (x Float32x16) AsUint64x8() Uint64x8 -// Float32x4 converts from Float64x2 to Float32x4 -func (from Float64x2) AsFloat32x4() (to Float32x4) +// AsFloat32x4 returns a Float32x4 with the same bit representation as x. +func (x Float64x2) AsFloat32x4() Float32x4 -// Int8x16 converts from Float64x2 to Int8x16 -func (from Float64x2) AsInt8x16() (to Int8x16) +// AsInt8x16 returns an Int8x16 with the same bit representation as x. +func (x Float64x2) AsInt8x16() Int8x16 -// Int16x8 converts from Float64x2 to Int16x8 -func (from Float64x2) AsInt16x8() (to Int16x8) +// AsInt16x8 returns an Int16x8 with the same bit representation as x. +func (x Float64x2) AsInt16x8() Int16x8 -// Int32x4 converts from Float64x2 to Int32x4 -func (from Float64x2) AsInt32x4() (to Int32x4) +// AsInt32x4 returns an Int32x4 with the same bit representation as x. +func (x Float64x2) AsInt32x4() Int32x4 -// Int64x2 converts from Float64x2 to Int64x2 -func (from Float64x2) AsInt64x2() (to Int64x2) +// AsInt64x2 returns an Int64x2 with the same bit representation as x. +func (x Float64x2) AsInt64x2() Int64x2 -// Uint8x16 converts from Float64x2 to Uint8x16 -func (from Float64x2) AsUint8x16() (to Uint8x16) +// AsUint8x16 returns a Uint8x16 with the same bit representation as x. +func (x Float64x2) AsUint8x16() Uint8x16 -// Uint16x8 converts from Float64x2 to Uint16x8 -func (from Float64x2) AsUint16x8() (to Uint16x8) +// AsUint16x8 returns a Uint16x8 with the same bit representation as x. +func (x Float64x2) AsUint16x8() Uint16x8 -// Uint32x4 converts from Float64x2 to Uint32x4 -func (from Float64x2) AsUint32x4() (to Uint32x4) +// AsUint32x4 returns a Uint32x4 with the same bit representation as x. +func (x Float64x2) AsUint32x4() Uint32x4 -// Uint64x2 converts from Float64x2 to Uint64x2 -func (from Float64x2) AsUint64x2() (to Uint64x2) +// AsUint64x2 returns a Uint64x2 with the same bit representation as x. +func (x Float64x2) AsUint64x2() Uint64x2 -// Float32x8 converts from Float64x4 to Float32x8 -func (from Float64x4) AsFloat32x8() (to Float32x8) +// AsFloat32x8 returns a Float32x8 with the same bit representation as x. +func (x Float64x4) AsFloat32x8() Float32x8 -// Int8x32 converts from Float64x4 to Int8x32 -func (from Float64x4) AsInt8x32() (to Int8x32) +// AsInt8x32 returns an Int8x32 with the same bit representation as x. +func (x Float64x4) AsInt8x32() Int8x32 -// Int16x16 converts from Float64x4 to Int16x16 -func (from Float64x4) AsInt16x16() (to Int16x16) +// AsInt16x16 returns an Int16x16 with the same bit representation as x. +func (x Float64x4) AsInt16x16() Int16x16 -// Int32x8 converts from Float64x4 to Int32x8 -func (from Float64x4) AsInt32x8() (to Int32x8) +// AsInt32x8 returns an Int32x8 with the same bit representation as x. +func (x Float64x4) AsInt32x8() Int32x8 -// Int64x4 converts from Float64x4 to Int64x4 -func (from Float64x4) AsInt64x4() (to Int64x4) +// AsInt64x4 returns an Int64x4 with the same bit representation as x. +func (x Float64x4) AsInt64x4() Int64x4 -// Uint8x32 converts from Float64x4 to Uint8x32 -func (from Float64x4) AsUint8x32() (to Uint8x32) +// AsUint8x32 returns a Uint8x32 with the same bit representation as x. +func (x Float64x4) AsUint8x32() Uint8x32 -// Uint16x16 converts from Float64x4 to Uint16x16 -func (from Float64x4) AsUint16x16() (to Uint16x16) +// AsUint16x16 returns a Uint16x16 with the same bit representation as x. +func (x Float64x4) AsUint16x16() Uint16x16 -// Uint32x8 converts from Float64x4 to Uint32x8 -func (from Float64x4) AsUint32x8() (to Uint32x8) +// AsUint32x8 returns a Uint32x8 with the same bit representation as x. +func (x Float64x4) AsUint32x8() Uint32x8 -// Uint64x4 converts from Float64x4 to Uint64x4 -func (from Float64x4) AsUint64x4() (to Uint64x4) +// AsUint64x4 returns a Uint64x4 with the same bit representation as x. +func (x Float64x4) AsUint64x4() Uint64x4 -// Float32x16 converts from Float64x8 to Float32x16 -func (from Float64x8) AsFloat32x16() (to Float32x16) +// AsFloat32x16 returns a Float32x16 with the same bit representation as x. +func (x Float64x8) AsFloat32x16() Float32x16 -// Int8x64 converts from Float64x8 to Int8x64 -func (from Float64x8) AsInt8x64() (to Int8x64) +// AsInt8x64 returns an Int8x64 with the same bit representation as x. +func (x Float64x8) AsInt8x64() Int8x64 -// Int16x32 converts from Float64x8 to Int16x32 -func (from Float64x8) AsInt16x32() (to Int16x32) +// AsInt16x32 returns an Int16x32 with the same bit representation as x. +func (x Float64x8) AsInt16x32() Int16x32 -// Int32x16 converts from Float64x8 to Int32x16 -func (from Float64x8) AsInt32x16() (to Int32x16) +// AsInt32x16 returns an Int32x16 with the same bit representation as x. +func (x Float64x8) AsInt32x16() Int32x16 -// Int64x8 converts from Float64x8 to Int64x8 -func (from Float64x8) AsInt64x8() (to Int64x8) +// AsInt64x8 returns an Int64x8 with the same bit representation as x. +func (x Float64x8) AsInt64x8() Int64x8 -// Uint8x64 converts from Float64x8 to Uint8x64 -func (from Float64x8) AsUint8x64() (to Uint8x64) +// AsUint8x64 returns a Uint8x64 with the same bit representation as x. +func (x Float64x8) AsUint8x64() Uint8x64 -// Uint16x32 converts from Float64x8 to Uint16x32 -func (from Float64x8) AsUint16x32() (to Uint16x32) +// AsUint16x32 returns a Uint16x32 with the same bit representation as x. +func (x Float64x8) AsUint16x32() Uint16x32 -// Uint32x16 converts from Float64x8 to Uint32x16 -func (from Float64x8) AsUint32x16() (to Uint32x16) +// AsUint32x16 returns a Uint32x16 with the same bit representation as x. +func (x Float64x8) AsUint32x16() Uint32x16 -// Uint64x8 converts from Float64x8 to Uint64x8 -func (from Float64x8) AsUint64x8() (to Uint64x8) +// AsUint64x8 returns a Uint64x8 with the same bit representation as x. +func (x Float64x8) AsUint64x8() Uint64x8 -// Float32x4 converts from Int8x16 to Float32x4 -func (from Int8x16) AsFloat32x4() (to Float32x4) +// AsFloat32x4 returns a Float32x4 with the same bit representation as x. +func (x Int8x16) AsFloat32x4() Float32x4 -// Float64x2 converts from Int8x16 to Float64x2 -func (from Int8x16) AsFloat64x2() (to Float64x2) +// AsFloat64x2 returns a Float64x2 with the same bit representation as x. +func (x Int8x16) AsFloat64x2() Float64x2 -// Int16x8 converts from Int8x16 to Int16x8 -func (from Int8x16) AsInt16x8() (to Int16x8) +// AsInt16x8 returns an Int16x8 with the same bit representation as x. +func (x Int8x16) AsInt16x8() Int16x8 -// Int32x4 converts from Int8x16 to Int32x4 -func (from Int8x16) AsInt32x4() (to Int32x4) +// AsInt32x4 returns an Int32x4 with the same bit representation as x. +func (x Int8x16) AsInt32x4() Int32x4 -// Int64x2 converts from Int8x16 to Int64x2 -func (from Int8x16) AsInt64x2() (to Int64x2) +// AsInt64x2 returns an Int64x2 with the same bit representation as x. +func (x Int8x16) AsInt64x2() Int64x2 -// Uint8x16 converts from Int8x16 to Uint8x16 -func (from Int8x16) AsUint8x16() (to Uint8x16) +// AsUint8x16 returns a Uint8x16 with the same bit representation as x. +func (x Int8x16) AsUint8x16() Uint8x16 -// Uint16x8 converts from Int8x16 to Uint16x8 -func (from Int8x16) AsUint16x8() (to Uint16x8) +// AsUint16x8 returns a Uint16x8 with the same bit representation as x. +func (x Int8x16) AsUint16x8() Uint16x8 -// Uint32x4 converts from Int8x16 to Uint32x4 -func (from Int8x16) AsUint32x4() (to Uint32x4) +// AsUint32x4 returns a Uint32x4 with the same bit representation as x. +func (x Int8x16) AsUint32x4() Uint32x4 -// Uint64x2 converts from Int8x16 to Uint64x2 -func (from Int8x16) AsUint64x2() (to Uint64x2) +// AsUint64x2 returns a Uint64x2 with the same bit representation as x. +func (x Int8x16) AsUint64x2() Uint64x2 -// Float32x8 converts from Int8x32 to Float32x8 -func (from Int8x32) AsFloat32x8() (to Float32x8) +// AsFloat32x8 returns a Float32x8 with the same bit representation as x. +func (x Int8x32) AsFloat32x8() Float32x8 -// Float64x4 converts from Int8x32 to Float64x4 -func (from Int8x32) AsFloat64x4() (to Float64x4) +// AsFloat64x4 returns a Float64x4 with the same bit representation as x. +func (x Int8x32) AsFloat64x4() Float64x4 -// Int16x16 converts from Int8x32 to Int16x16 -func (from Int8x32) AsInt16x16() (to Int16x16) +// AsInt16x16 returns an Int16x16 with the same bit representation as x. +func (x Int8x32) AsInt16x16() Int16x16 -// Int32x8 converts from Int8x32 to Int32x8 -func (from Int8x32) AsInt32x8() (to Int32x8) +// AsInt32x8 returns an Int32x8 with the same bit representation as x. +func (x Int8x32) AsInt32x8() Int32x8 -// Int64x4 converts from Int8x32 to Int64x4 -func (from Int8x32) AsInt64x4() (to Int64x4) +// AsInt64x4 returns an Int64x4 with the same bit representation as x. +func (x Int8x32) AsInt64x4() Int64x4 -// Uint8x32 converts from Int8x32 to Uint8x32 -func (from Int8x32) AsUint8x32() (to Uint8x32) +// AsUint8x32 returns a Uint8x32 with the same bit representation as x. +func (x Int8x32) AsUint8x32() Uint8x32 -// Uint16x16 converts from Int8x32 to Uint16x16 -func (from Int8x32) AsUint16x16() (to Uint16x16) +// AsUint16x16 returns a Uint16x16 with the same bit representation as x. +func (x Int8x32) AsUint16x16() Uint16x16 -// Uint32x8 converts from Int8x32 to Uint32x8 -func (from Int8x32) AsUint32x8() (to Uint32x8) +// AsUint32x8 returns a Uint32x8 with the same bit representation as x. +func (x Int8x32) AsUint32x8() Uint32x8 -// Uint64x4 converts from Int8x32 to Uint64x4 -func (from Int8x32) AsUint64x4() (to Uint64x4) +// AsUint64x4 returns a Uint64x4 with the same bit representation as x. +func (x Int8x32) AsUint64x4() Uint64x4 -// Float32x16 converts from Int8x64 to Float32x16 -func (from Int8x64) AsFloat32x16() (to Float32x16) +// AsFloat32x16 returns a Float32x16 with the same bit representation as x. +func (x Int8x64) AsFloat32x16() Float32x16 -// Float64x8 converts from Int8x64 to Float64x8 -func (from Int8x64) AsFloat64x8() (to Float64x8) +// AsFloat64x8 returns a Float64x8 with the same bit representation as x. +func (x Int8x64) AsFloat64x8() Float64x8 -// Int16x32 converts from Int8x64 to Int16x32 -func (from Int8x64) AsInt16x32() (to Int16x32) +// AsInt16x32 returns an Int16x32 with the same bit representation as x. +func (x Int8x64) AsInt16x32() Int16x32 -// Int32x16 converts from Int8x64 to Int32x16 -func (from Int8x64) AsInt32x16() (to Int32x16) +// AsInt32x16 returns an Int32x16 with the same bit representation as x. +func (x Int8x64) AsInt32x16() Int32x16 -// Int64x8 converts from Int8x64 to Int64x8 -func (from Int8x64) AsInt64x8() (to Int64x8) +// AsInt64x8 returns an Int64x8 with the same bit representation as x. +func (x Int8x64) AsInt64x8() Int64x8 -// Uint8x64 converts from Int8x64 to Uint8x64 -func (from Int8x64) AsUint8x64() (to Uint8x64) +// AsUint8x64 returns a Uint8x64 with the same bit representation as x. +func (x Int8x64) AsUint8x64() Uint8x64 -// Uint16x32 converts from Int8x64 to Uint16x32 -func (from Int8x64) AsUint16x32() (to Uint16x32) +// AsUint16x32 returns a Uint16x32 with the same bit representation as x. +func (x Int8x64) AsUint16x32() Uint16x32 -// Uint32x16 converts from Int8x64 to Uint32x16 -func (from Int8x64) AsUint32x16() (to Uint32x16) +// AsUint32x16 returns a Uint32x16 with the same bit representation as x. +func (x Int8x64) AsUint32x16() Uint32x16 -// Uint64x8 converts from Int8x64 to Uint64x8 -func (from Int8x64) AsUint64x8() (to Uint64x8) +// AsUint64x8 returns a Uint64x8 with the same bit representation as x. +func (x Int8x64) AsUint64x8() Uint64x8 -// Float32x4 converts from Int16x8 to Float32x4 -func (from Int16x8) AsFloat32x4() (to Float32x4) +// AsFloat32x4 returns a Float32x4 with the same bit representation as x. +func (x Int16x8) AsFloat32x4() Float32x4 -// Float64x2 converts from Int16x8 to Float64x2 -func (from Int16x8) AsFloat64x2() (to Float64x2) +// AsFloat64x2 returns a Float64x2 with the same bit representation as x. +func (x Int16x8) AsFloat64x2() Float64x2 -// Int8x16 converts from Int16x8 to Int8x16 -func (from Int16x8) AsInt8x16() (to Int8x16) +// AsInt8x16 returns an Int8x16 with the same bit representation as x. +func (x Int16x8) AsInt8x16() Int8x16 -// Int32x4 converts from Int16x8 to Int32x4 -func (from Int16x8) AsInt32x4() (to Int32x4) +// AsInt32x4 returns an Int32x4 with the same bit representation as x. +func (x Int16x8) AsInt32x4() Int32x4 -// Int64x2 converts from Int16x8 to Int64x2 -func (from Int16x8) AsInt64x2() (to Int64x2) +// AsInt64x2 returns an Int64x2 with the same bit representation as x. +func (x Int16x8) AsInt64x2() Int64x2 -// Uint8x16 converts from Int16x8 to Uint8x16 -func (from Int16x8) AsUint8x16() (to Uint8x16) +// AsUint8x16 returns a Uint8x16 with the same bit representation as x. +func (x Int16x8) AsUint8x16() Uint8x16 -// Uint16x8 converts from Int16x8 to Uint16x8 -func (from Int16x8) AsUint16x8() (to Uint16x8) +// AsUint16x8 returns a Uint16x8 with the same bit representation as x. +func (x Int16x8) AsUint16x8() Uint16x8 -// Uint32x4 converts from Int16x8 to Uint32x4 -func (from Int16x8) AsUint32x4() (to Uint32x4) +// AsUint32x4 returns a Uint32x4 with the same bit representation as x. +func (x Int16x8) AsUint32x4() Uint32x4 -// Uint64x2 converts from Int16x8 to Uint64x2 -func (from Int16x8) AsUint64x2() (to Uint64x2) +// AsUint64x2 returns a Uint64x2 with the same bit representation as x. +func (x Int16x8) AsUint64x2() Uint64x2 -// Float32x8 converts from Int16x16 to Float32x8 -func (from Int16x16) AsFloat32x8() (to Float32x8) +// AsFloat32x8 returns a Float32x8 with the same bit representation as x. +func (x Int16x16) AsFloat32x8() Float32x8 -// Float64x4 converts from Int16x16 to Float64x4 -func (from Int16x16) AsFloat64x4() (to Float64x4) +// AsFloat64x4 returns a Float64x4 with the same bit representation as x. +func (x Int16x16) AsFloat64x4() Float64x4 -// Int8x32 converts from Int16x16 to Int8x32 -func (from Int16x16) AsInt8x32() (to Int8x32) +// AsInt8x32 returns an Int8x32 with the same bit representation as x. +func (x Int16x16) AsInt8x32() Int8x32 -// Int32x8 converts from Int16x16 to Int32x8 -func (from Int16x16) AsInt32x8() (to Int32x8) +// AsInt32x8 returns an Int32x8 with the same bit representation as x. +func (x Int16x16) AsInt32x8() Int32x8 -// Int64x4 converts from Int16x16 to Int64x4 -func (from Int16x16) AsInt64x4() (to Int64x4) +// AsInt64x4 returns an Int64x4 with the same bit representation as x. +func (x Int16x16) AsInt64x4() Int64x4 -// Uint8x32 converts from Int16x16 to Uint8x32 -func (from Int16x16) AsUint8x32() (to Uint8x32) +// AsUint8x32 returns a Uint8x32 with the same bit representation as x. +func (x Int16x16) AsUint8x32() Uint8x32 -// Uint16x16 converts from Int16x16 to Uint16x16 -func (from Int16x16) AsUint16x16() (to Uint16x16) +// AsUint16x16 returns a Uint16x16 with the same bit representation as x. +func (x Int16x16) AsUint16x16() Uint16x16 -// Uint32x8 converts from Int16x16 to Uint32x8 -func (from Int16x16) AsUint32x8() (to Uint32x8) +// AsUint32x8 returns a Uint32x8 with the same bit representation as x. +func (x Int16x16) AsUint32x8() Uint32x8 -// Uint64x4 converts from Int16x16 to Uint64x4 -func (from Int16x16) AsUint64x4() (to Uint64x4) +// AsUint64x4 returns a Uint64x4 with the same bit representation as x. +func (x Int16x16) AsUint64x4() Uint64x4 -// Float32x16 converts from Int16x32 to Float32x16 -func (from Int16x32) AsFloat32x16() (to Float32x16) +// AsFloat32x16 returns a Float32x16 with the same bit representation as x. +func (x Int16x32) AsFloat32x16() Float32x16 -// Float64x8 converts from Int16x32 to Float64x8 -func (from Int16x32) AsFloat64x8() (to Float64x8) +// AsFloat64x8 returns a Float64x8 with the same bit representation as x. +func (x Int16x32) AsFloat64x8() Float64x8 -// Int8x64 converts from Int16x32 to Int8x64 -func (from Int16x32) AsInt8x64() (to Int8x64) +// AsInt8x64 returns an Int8x64 with the same bit representation as x. +func (x Int16x32) AsInt8x64() Int8x64 -// Int32x16 converts from Int16x32 to Int32x16 -func (from Int16x32) AsInt32x16() (to Int32x16) +// AsInt32x16 returns an Int32x16 with the same bit representation as x. +func (x Int16x32) AsInt32x16() Int32x16 -// Int64x8 converts from Int16x32 to Int64x8 -func (from Int16x32) AsInt64x8() (to Int64x8) +// AsInt64x8 returns an Int64x8 with the same bit representation as x. +func (x Int16x32) AsInt64x8() Int64x8 -// Uint8x64 converts from Int16x32 to Uint8x64 -func (from Int16x32) AsUint8x64() (to Uint8x64) +// AsUint8x64 returns a Uint8x64 with the same bit representation as x. +func (x Int16x32) AsUint8x64() Uint8x64 -// Uint16x32 converts from Int16x32 to Uint16x32 -func (from Int16x32) AsUint16x32() (to Uint16x32) +// AsUint16x32 returns a Uint16x32 with the same bit representation as x. +func (x Int16x32) AsUint16x32() Uint16x32 -// Uint32x16 converts from Int16x32 to Uint32x16 -func (from Int16x32) AsUint32x16() (to Uint32x16) +// AsUint32x16 returns a Uint32x16 with the same bit representation as x. +func (x Int16x32) AsUint32x16() Uint32x16 -// Uint64x8 converts from Int16x32 to Uint64x8 -func (from Int16x32) AsUint64x8() (to Uint64x8) +// AsUint64x8 returns a Uint64x8 with the same bit representation as x. +func (x Int16x32) AsUint64x8() Uint64x8 -// Float32x4 converts from Int32x4 to Float32x4 -func (from Int32x4) AsFloat32x4() (to Float32x4) +// AsFloat32x4 returns a Float32x4 with the same bit representation as x. +func (x Int32x4) AsFloat32x4() Float32x4 -// Float64x2 converts from Int32x4 to Float64x2 -func (from Int32x4) AsFloat64x2() (to Float64x2) +// AsFloat64x2 returns a Float64x2 with the same bit representation as x. +func (x Int32x4) AsFloat64x2() Float64x2 -// Int8x16 converts from Int32x4 to Int8x16 -func (from Int32x4) AsInt8x16() (to Int8x16) +// AsInt8x16 returns an Int8x16 with the same bit representation as x. +func (x Int32x4) AsInt8x16() Int8x16 -// Int16x8 converts from Int32x4 to Int16x8 -func (from Int32x4) AsInt16x8() (to Int16x8) +// AsInt16x8 returns an Int16x8 with the same bit representation as x. +func (x Int32x4) AsInt16x8() Int16x8 -// Int64x2 converts from Int32x4 to Int64x2 -func (from Int32x4) AsInt64x2() (to Int64x2) +// AsInt64x2 returns an Int64x2 with the same bit representation as x. +func (x Int32x4) AsInt64x2() Int64x2 -// Uint8x16 converts from Int32x4 to Uint8x16 -func (from Int32x4) AsUint8x16() (to Uint8x16) +// AsUint8x16 returns a Uint8x16 with the same bit representation as x. +func (x Int32x4) AsUint8x16() Uint8x16 -// Uint16x8 converts from Int32x4 to Uint16x8 -func (from Int32x4) AsUint16x8() (to Uint16x8) +// AsUint16x8 returns a Uint16x8 with the same bit representation as x. +func (x Int32x4) AsUint16x8() Uint16x8 -// Uint32x4 converts from Int32x4 to Uint32x4 -func (from Int32x4) AsUint32x4() (to Uint32x4) +// AsUint32x4 returns a Uint32x4 with the same bit representation as x. +func (x Int32x4) AsUint32x4() Uint32x4 -// Uint64x2 converts from Int32x4 to Uint64x2 -func (from Int32x4) AsUint64x2() (to Uint64x2) +// AsUint64x2 returns a Uint64x2 with the same bit representation as x. +func (x Int32x4) AsUint64x2() Uint64x2 -// Float32x8 converts from Int32x8 to Float32x8 -func (from Int32x8) AsFloat32x8() (to Float32x8) +// AsFloat32x8 returns a Float32x8 with the same bit representation as x. +func (x Int32x8) AsFloat32x8() Float32x8 -// Float64x4 converts from Int32x8 to Float64x4 -func (from Int32x8) AsFloat64x4() (to Float64x4) +// AsFloat64x4 returns a Float64x4 with the same bit representation as x. +func (x Int32x8) AsFloat64x4() Float64x4 -// Int8x32 converts from Int32x8 to Int8x32 -func (from Int32x8) AsInt8x32() (to Int8x32) +// AsInt8x32 returns an Int8x32 with the same bit representation as x. +func (x Int32x8) AsInt8x32() Int8x32 -// Int16x16 converts from Int32x8 to Int16x16 -func (from Int32x8) AsInt16x16() (to Int16x16) +// AsInt16x16 returns an Int16x16 with the same bit representation as x. +func (x Int32x8) AsInt16x16() Int16x16 -// Int64x4 converts from Int32x8 to Int64x4 -func (from Int32x8) AsInt64x4() (to Int64x4) +// AsInt64x4 returns an Int64x4 with the same bit representation as x. +func (x Int32x8) AsInt64x4() Int64x4 -// Uint8x32 converts from Int32x8 to Uint8x32 -func (from Int32x8) AsUint8x32() (to Uint8x32) +// AsUint8x32 returns a Uint8x32 with the same bit representation as x. +func (x Int32x8) AsUint8x32() Uint8x32 -// Uint16x16 converts from Int32x8 to Uint16x16 -func (from Int32x8) AsUint16x16() (to Uint16x16) +// AsUint16x16 returns a Uint16x16 with the same bit representation as x. +func (x Int32x8) AsUint16x16() Uint16x16 -// Uint32x8 converts from Int32x8 to Uint32x8 -func (from Int32x8) AsUint32x8() (to Uint32x8) +// AsUint32x8 returns a Uint32x8 with the same bit representation as x. +func (x Int32x8) AsUint32x8() Uint32x8 -// Uint64x4 converts from Int32x8 to Uint64x4 -func (from Int32x8) AsUint64x4() (to Uint64x4) +// AsUint64x4 returns a Uint64x4 with the same bit representation as x. +func (x Int32x8) AsUint64x4() Uint64x4 -// Float32x16 converts from Int32x16 to Float32x16 -func (from Int32x16) AsFloat32x16() (to Float32x16) +// AsFloat32x16 returns a Float32x16 with the same bit representation as x. +func (x Int32x16) AsFloat32x16() Float32x16 -// Float64x8 converts from Int32x16 to Float64x8 -func (from Int32x16) AsFloat64x8() (to Float64x8) +// AsFloat64x8 returns a Float64x8 with the same bit representation as x. +func (x Int32x16) AsFloat64x8() Float64x8 -// Int8x64 converts from Int32x16 to Int8x64 -func (from Int32x16) AsInt8x64() (to Int8x64) +// AsInt8x64 returns an Int8x64 with the same bit representation as x. +func (x Int32x16) AsInt8x64() Int8x64 -// Int16x32 converts from Int32x16 to Int16x32 -func (from Int32x16) AsInt16x32() (to Int16x32) +// AsInt16x32 returns an Int16x32 with the same bit representation as x. +func (x Int32x16) AsInt16x32() Int16x32 -// Int64x8 converts from Int32x16 to Int64x8 -func (from Int32x16) AsInt64x8() (to Int64x8) +// AsInt64x8 returns an Int64x8 with the same bit representation as x. +func (x Int32x16) AsInt64x8() Int64x8 -// Uint8x64 converts from Int32x16 to Uint8x64 -func (from Int32x16) AsUint8x64() (to Uint8x64) +// AsUint8x64 returns a Uint8x64 with the same bit representation as x. +func (x Int32x16) AsUint8x64() Uint8x64 -// Uint16x32 converts from Int32x16 to Uint16x32 -func (from Int32x16) AsUint16x32() (to Uint16x32) +// AsUint16x32 returns a Uint16x32 with the same bit representation as x. +func (x Int32x16) AsUint16x32() Uint16x32 -// Uint32x16 converts from Int32x16 to Uint32x16 -func (from Int32x16) AsUint32x16() (to Uint32x16) +// AsUint32x16 returns a Uint32x16 with the same bit representation as x. +func (x Int32x16) AsUint32x16() Uint32x16 -// Uint64x8 converts from Int32x16 to Uint64x8 -func (from Int32x16) AsUint64x8() (to Uint64x8) +// AsUint64x8 returns a Uint64x8 with the same bit representation as x. +func (x Int32x16) AsUint64x8() Uint64x8 -// Float32x4 converts from Int64x2 to Float32x4 -func (from Int64x2) AsFloat32x4() (to Float32x4) +// AsFloat32x4 returns a Float32x4 with the same bit representation as x. +func (x Int64x2) AsFloat32x4() Float32x4 -// Float64x2 converts from Int64x2 to Float64x2 -func (from Int64x2) AsFloat64x2() (to Float64x2) +// AsFloat64x2 returns a Float64x2 with the same bit representation as x. +func (x Int64x2) AsFloat64x2() Float64x2 -// Int8x16 converts from Int64x2 to Int8x16 -func (from Int64x2) AsInt8x16() (to Int8x16) +// AsInt8x16 returns an Int8x16 with the same bit representation as x. +func (x Int64x2) AsInt8x16() Int8x16 -// Int16x8 converts from Int64x2 to Int16x8 -func (from Int64x2) AsInt16x8() (to Int16x8) +// AsInt16x8 returns an Int16x8 with the same bit representation as x. +func (x Int64x2) AsInt16x8() Int16x8 -// Int32x4 converts from Int64x2 to Int32x4 -func (from Int64x2) AsInt32x4() (to Int32x4) +// AsInt32x4 returns an Int32x4 with the same bit representation as x. +func (x Int64x2) AsInt32x4() Int32x4 -// Uint8x16 converts from Int64x2 to Uint8x16 -func (from Int64x2) AsUint8x16() (to Uint8x16) +// AsUint8x16 returns a Uint8x16 with the same bit representation as x. +func (x Int64x2) AsUint8x16() Uint8x16 -// Uint16x8 converts from Int64x2 to Uint16x8 -func (from Int64x2) AsUint16x8() (to Uint16x8) +// AsUint16x8 returns a Uint16x8 with the same bit representation as x. +func (x Int64x2) AsUint16x8() Uint16x8 -// Uint32x4 converts from Int64x2 to Uint32x4 -func (from Int64x2) AsUint32x4() (to Uint32x4) +// AsUint32x4 returns a Uint32x4 with the same bit representation as x. +func (x Int64x2) AsUint32x4() Uint32x4 -// Uint64x2 converts from Int64x2 to Uint64x2 -func (from Int64x2) AsUint64x2() (to Uint64x2) +// AsUint64x2 returns a Uint64x2 with the same bit representation as x. +func (x Int64x2) AsUint64x2() Uint64x2 -// Float32x8 converts from Int64x4 to Float32x8 -func (from Int64x4) AsFloat32x8() (to Float32x8) +// AsFloat32x8 returns a Float32x8 with the same bit representation as x. +func (x Int64x4) AsFloat32x8() Float32x8 -// Float64x4 converts from Int64x4 to Float64x4 -func (from Int64x4) AsFloat64x4() (to Float64x4) +// AsFloat64x4 returns a Float64x4 with the same bit representation as x. +func (x Int64x4) AsFloat64x4() Float64x4 -// Int8x32 converts from Int64x4 to Int8x32 -func (from Int64x4) AsInt8x32() (to Int8x32) +// AsInt8x32 returns an Int8x32 with the same bit representation as x. +func (x Int64x4) AsInt8x32() Int8x32 -// Int16x16 converts from Int64x4 to Int16x16 -func (from Int64x4) AsInt16x16() (to Int16x16) +// AsInt16x16 returns an Int16x16 with the same bit representation as x. +func (x Int64x4) AsInt16x16() Int16x16 -// Int32x8 converts from Int64x4 to Int32x8 -func (from Int64x4) AsInt32x8() (to Int32x8) +// AsInt32x8 returns an Int32x8 with the same bit representation as x. +func (x Int64x4) AsInt32x8() Int32x8 -// Uint8x32 converts from Int64x4 to Uint8x32 -func (from Int64x4) AsUint8x32() (to Uint8x32) +// AsUint8x32 returns a Uint8x32 with the same bit representation as x. +func (x Int64x4) AsUint8x32() Uint8x32 -// Uint16x16 converts from Int64x4 to Uint16x16 -func (from Int64x4) AsUint16x16() (to Uint16x16) +// AsUint16x16 returns a Uint16x16 with the same bit representation as x. +func (x Int64x4) AsUint16x16() Uint16x16 -// Uint32x8 converts from Int64x4 to Uint32x8 -func (from Int64x4) AsUint32x8() (to Uint32x8) +// AsUint32x8 returns a Uint32x8 with the same bit representation as x. +func (x Int64x4) AsUint32x8() Uint32x8 -// Uint64x4 converts from Int64x4 to Uint64x4 -func (from Int64x4) AsUint64x4() (to Uint64x4) +// AsUint64x4 returns a Uint64x4 with the same bit representation as x. +func (x Int64x4) AsUint64x4() Uint64x4 -// Float32x16 converts from Int64x8 to Float32x16 -func (from Int64x8) AsFloat32x16() (to Float32x16) +// AsFloat32x16 returns a Float32x16 with the same bit representation as x. +func (x Int64x8) AsFloat32x16() Float32x16 -// Float64x8 converts from Int64x8 to Float64x8 -func (from Int64x8) AsFloat64x8() (to Float64x8) +// AsFloat64x8 returns a Float64x8 with the same bit representation as x. +func (x Int64x8) AsFloat64x8() Float64x8 -// Int8x64 converts from Int64x8 to Int8x64 -func (from Int64x8) AsInt8x64() (to Int8x64) +// AsInt8x64 returns an Int8x64 with the same bit representation as x. +func (x Int64x8) AsInt8x64() Int8x64 -// Int16x32 converts from Int64x8 to Int16x32 -func (from Int64x8) AsInt16x32() (to Int16x32) +// AsInt16x32 returns an Int16x32 with the same bit representation as x. +func (x Int64x8) AsInt16x32() Int16x32 -// Int32x16 converts from Int64x8 to Int32x16 -func (from Int64x8) AsInt32x16() (to Int32x16) +// AsInt32x16 returns an Int32x16 with the same bit representation as x. +func (x Int64x8) AsInt32x16() Int32x16 -// Uint8x64 converts from Int64x8 to Uint8x64 -func (from Int64x8) AsUint8x64() (to Uint8x64) +// AsUint8x64 returns a Uint8x64 with the same bit representation as x. +func (x Int64x8) AsUint8x64() Uint8x64 -// Uint16x32 converts from Int64x8 to Uint16x32 -func (from Int64x8) AsUint16x32() (to Uint16x32) +// AsUint16x32 returns a Uint16x32 with the same bit representation as x. +func (x Int64x8) AsUint16x32() Uint16x32 -// Uint32x16 converts from Int64x8 to Uint32x16 -func (from Int64x8) AsUint32x16() (to Uint32x16) +// AsUint32x16 returns a Uint32x16 with the same bit representation as x. +func (x Int64x8) AsUint32x16() Uint32x16 -// Uint64x8 converts from Int64x8 to Uint64x8 -func (from Int64x8) AsUint64x8() (to Uint64x8) +// AsUint64x8 returns a Uint64x8 with the same bit representation as x. +func (x Int64x8) AsUint64x8() Uint64x8 -// Float32x4 converts from Uint8x16 to Float32x4 -func (from Uint8x16) AsFloat32x4() (to Float32x4) +// AsFloat32x4 returns a Float32x4 with the same bit representation as x. +func (x Uint8x16) AsFloat32x4() Float32x4 -// Float64x2 converts from Uint8x16 to Float64x2 -func (from Uint8x16) AsFloat64x2() (to Float64x2) +// AsFloat64x2 returns a Float64x2 with the same bit representation as x. +func (x Uint8x16) AsFloat64x2() Float64x2 -// Int8x16 converts from Uint8x16 to Int8x16 -func (from Uint8x16) AsInt8x16() (to Int8x16) +// AsInt8x16 returns an Int8x16 with the same bit representation as x. +func (x Uint8x16) AsInt8x16() Int8x16 -// Int16x8 converts from Uint8x16 to Int16x8 -func (from Uint8x16) AsInt16x8() (to Int16x8) +// AsInt16x8 returns an Int16x8 with the same bit representation as x. +func (x Uint8x16) AsInt16x8() Int16x8 -// Int32x4 converts from Uint8x16 to Int32x4 -func (from Uint8x16) AsInt32x4() (to Int32x4) +// AsInt32x4 returns an Int32x4 with the same bit representation as x. +func (x Uint8x16) AsInt32x4() Int32x4 -// Int64x2 converts from Uint8x16 to Int64x2 -func (from Uint8x16) AsInt64x2() (to Int64x2) +// AsInt64x2 returns an Int64x2 with the same bit representation as x. +func (x Uint8x16) AsInt64x2() Int64x2 -// Uint16x8 converts from Uint8x16 to Uint16x8 -func (from Uint8x16) AsUint16x8() (to Uint16x8) +// AsUint16x8 returns a Uint16x8 with the same bit representation as x. +func (x Uint8x16) AsUint16x8() Uint16x8 -// Uint32x4 converts from Uint8x16 to Uint32x4 -func (from Uint8x16) AsUint32x4() (to Uint32x4) +// AsUint32x4 returns a Uint32x4 with the same bit representation as x. +func (x Uint8x16) AsUint32x4() Uint32x4 -// Uint64x2 converts from Uint8x16 to Uint64x2 -func (from Uint8x16) AsUint64x2() (to Uint64x2) +// AsUint64x2 returns a Uint64x2 with the same bit representation as x. +func (x Uint8x16) AsUint64x2() Uint64x2 -// Float32x8 converts from Uint8x32 to Float32x8 -func (from Uint8x32) AsFloat32x8() (to Float32x8) +// AsFloat32x8 returns a Float32x8 with the same bit representation as x. +func (x Uint8x32) AsFloat32x8() Float32x8 -// Float64x4 converts from Uint8x32 to Float64x4 -func (from Uint8x32) AsFloat64x4() (to Float64x4) +// AsFloat64x4 returns a Float64x4 with the same bit representation as x. +func (x Uint8x32) AsFloat64x4() Float64x4 -// Int8x32 converts from Uint8x32 to Int8x32 -func (from Uint8x32) AsInt8x32() (to Int8x32) +// AsInt8x32 returns an Int8x32 with the same bit representation as x. +func (x Uint8x32) AsInt8x32() Int8x32 -// Int16x16 converts from Uint8x32 to Int16x16 -func (from Uint8x32) AsInt16x16() (to Int16x16) +// AsInt16x16 returns an Int16x16 with the same bit representation as x. +func (x Uint8x32) AsInt16x16() Int16x16 -// Int32x8 converts from Uint8x32 to Int32x8 -func (from Uint8x32) AsInt32x8() (to Int32x8) +// AsInt32x8 returns an Int32x8 with the same bit representation as x. +func (x Uint8x32) AsInt32x8() Int32x8 -// Int64x4 converts from Uint8x32 to Int64x4 -func (from Uint8x32) AsInt64x4() (to Int64x4) +// AsInt64x4 returns an Int64x4 with the same bit representation as x. +func (x Uint8x32) AsInt64x4() Int64x4 -// Uint16x16 converts from Uint8x32 to Uint16x16 -func (from Uint8x32) AsUint16x16() (to Uint16x16) +// AsUint16x16 returns a Uint16x16 with the same bit representation as x. +func (x Uint8x32) AsUint16x16() Uint16x16 -// Uint32x8 converts from Uint8x32 to Uint32x8 -func (from Uint8x32) AsUint32x8() (to Uint32x8) +// AsUint32x8 returns a Uint32x8 with the same bit representation as x. +func (x Uint8x32) AsUint32x8() Uint32x8 -// Uint64x4 converts from Uint8x32 to Uint64x4 -func (from Uint8x32) AsUint64x4() (to Uint64x4) +// AsUint64x4 returns a Uint64x4 with the same bit representation as x. +func (x Uint8x32) AsUint64x4() Uint64x4 -// Float32x16 converts from Uint8x64 to Float32x16 -func (from Uint8x64) AsFloat32x16() (to Float32x16) +// AsFloat32x16 returns a Float32x16 with the same bit representation as x. +func (x Uint8x64) AsFloat32x16() Float32x16 -// Float64x8 converts from Uint8x64 to Float64x8 -func (from Uint8x64) AsFloat64x8() (to Float64x8) +// AsFloat64x8 returns a Float64x8 with the same bit representation as x. +func (x Uint8x64) AsFloat64x8() Float64x8 -// Int8x64 converts from Uint8x64 to Int8x64 -func (from Uint8x64) AsInt8x64() (to Int8x64) +// AsInt8x64 returns an Int8x64 with the same bit representation as x. +func (x Uint8x64) AsInt8x64() Int8x64 -// Int16x32 converts from Uint8x64 to Int16x32 -func (from Uint8x64) AsInt16x32() (to Int16x32) +// AsInt16x32 returns an Int16x32 with the same bit representation as x. +func (x Uint8x64) AsInt16x32() Int16x32 -// Int32x16 converts from Uint8x64 to Int32x16 -func (from Uint8x64) AsInt32x16() (to Int32x16) +// AsInt32x16 returns an Int32x16 with the same bit representation as x. +func (x Uint8x64) AsInt32x16() Int32x16 -// Int64x8 converts from Uint8x64 to Int64x8 -func (from Uint8x64) AsInt64x8() (to Int64x8) +// AsInt64x8 returns an Int64x8 with the same bit representation as x. +func (x Uint8x64) AsInt64x8() Int64x8 -// Uint16x32 converts from Uint8x64 to Uint16x32 -func (from Uint8x64) AsUint16x32() (to Uint16x32) +// AsUint16x32 returns a Uint16x32 with the same bit representation as x. +func (x Uint8x64) AsUint16x32() Uint16x32 -// Uint32x16 converts from Uint8x64 to Uint32x16 -func (from Uint8x64) AsUint32x16() (to Uint32x16) +// AsUint32x16 returns a Uint32x16 with the same bit representation as x. +func (x Uint8x64) AsUint32x16() Uint32x16 -// Uint64x8 converts from Uint8x64 to Uint64x8 -func (from Uint8x64) AsUint64x8() (to Uint64x8) +// AsUint64x8 returns a Uint64x8 with the same bit representation as x. +func (x Uint8x64) AsUint64x8() Uint64x8 -// Float32x4 converts from Uint16x8 to Float32x4 -func (from Uint16x8) AsFloat32x4() (to Float32x4) +// AsFloat32x4 returns a Float32x4 with the same bit representation as x. +func (x Uint16x8) AsFloat32x4() Float32x4 -// Float64x2 converts from Uint16x8 to Float64x2 -func (from Uint16x8) AsFloat64x2() (to Float64x2) +// AsFloat64x2 returns a Float64x2 with the same bit representation as x. +func (x Uint16x8) AsFloat64x2() Float64x2 -// Int8x16 converts from Uint16x8 to Int8x16 -func (from Uint16x8) AsInt8x16() (to Int8x16) +// AsInt8x16 returns an Int8x16 with the same bit representation as x. +func (x Uint16x8) AsInt8x16() Int8x16 -// Int16x8 converts from Uint16x8 to Int16x8 -func (from Uint16x8) AsInt16x8() (to Int16x8) +// AsInt16x8 returns an Int16x8 with the same bit representation as x. +func (x Uint16x8) AsInt16x8() Int16x8 -// Int32x4 converts from Uint16x8 to Int32x4 -func (from Uint16x8) AsInt32x4() (to Int32x4) +// AsInt32x4 returns an Int32x4 with the same bit representation as x. +func (x Uint16x8) AsInt32x4() Int32x4 -// Int64x2 converts from Uint16x8 to Int64x2 -func (from Uint16x8) AsInt64x2() (to Int64x2) +// AsInt64x2 returns an Int64x2 with the same bit representation as x. +func (x Uint16x8) AsInt64x2() Int64x2 -// Uint8x16 converts from Uint16x8 to Uint8x16 -func (from Uint16x8) AsUint8x16() (to Uint8x16) +// AsUint8x16 returns a Uint8x16 with the same bit representation as x. +func (x Uint16x8) AsUint8x16() Uint8x16 -// Uint32x4 converts from Uint16x8 to Uint32x4 -func (from Uint16x8) AsUint32x4() (to Uint32x4) +// AsUint32x4 returns a Uint32x4 with the same bit representation as x. +func (x Uint16x8) AsUint32x4() Uint32x4 -// Uint64x2 converts from Uint16x8 to Uint64x2 -func (from Uint16x8) AsUint64x2() (to Uint64x2) +// AsUint64x2 returns a Uint64x2 with the same bit representation as x. +func (x Uint16x8) AsUint64x2() Uint64x2 -// Float32x8 converts from Uint16x16 to Float32x8 -func (from Uint16x16) AsFloat32x8() (to Float32x8) +// AsFloat32x8 returns a Float32x8 with the same bit representation as x. +func (x Uint16x16) AsFloat32x8() Float32x8 -// Float64x4 converts from Uint16x16 to Float64x4 -func (from Uint16x16) AsFloat64x4() (to Float64x4) +// AsFloat64x4 returns a Float64x4 with the same bit representation as x. +func (x Uint16x16) AsFloat64x4() Float64x4 -// Int8x32 converts from Uint16x16 to Int8x32 -func (from Uint16x16) AsInt8x32() (to Int8x32) +// AsInt8x32 returns an Int8x32 with the same bit representation as x. +func (x Uint16x16) AsInt8x32() Int8x32 -// Int16x16 converts from Uint16x16 to Int16x16 -func (from Uint16x16) AsInt16x16() (to Int16x16) +// AsInt16x16 returns an Int16x16 with the same bit representation as x. +func (x Uint16x16) AsInt16x16() Int16x16 -// Int32x8 converts from Uint16x16 to Int32x8 -func (from Uint16x16) AsInt32x8() (to Int32x8) +// AsInt32x8 returns an Int32x8 with the same bit representation as x. +func (x Uint16x16) AsInt32x8() Int32x8 -// Int64x4 converts from Uint16x16 to Int64x4 -func (from Uint16x16) AsInt64x4() (to Int64x4) +// AsInt64x4 returns an Int64x4 with the same bit representation as x. +func (x Uint16x16) AsInt64x4() Int64x4 -// Uint8x32 converts from Uint16x16 to Uint8x32 -func (from Uint16x16) AsUint8x32() (to Uint8x32) +// AsUint8x32 returns a Uint8x32 with the same bit representation as x. +func (x Uint16x16) AsUint8x32() Uint8x32 -// Uint32x8 converts from Uint16x16 to Uint32x8 -func (from Uint16x16) AsUint32x8() (to Uint32x8) +// AsUint32x8 returns a Uint32x8 with the same bit representation as x. +func (x Uint16x16) AsUint32x8() Uint32x8 -// Uint64x4 converts from Uint16x16 to Uint64x4 -func (from Uint16x16) AsUint64x4() (to Uint64x4) +// AsUint64x4 returns a Uint64x4 with the same bit representation as x. +func (x Uint16x16) AsUint64x4() Uint64x4 -// Float32x16 converts from Uint16x32 to Float32x16 -func (from Uint16x32) AsFloat32x16() (to Float32x16) +// AsFloat32x16 returns a Float32x16 with the same bit representation as x. +func (x Uint16x32) AsFloat32x16() Float32x16 -// Float64x8 converts from Uint16x32 to Float64x8 -func (from Uint16x32) AsFloat64x8() (to Float64x8) +// AsFloat64x8 returns a Float64x8 with the same bit representation as x. +func (x Uint16x32) AsFloat64x8() Float64x8 -// Int8x64 converts from Uint16x32 to Int8x64 -func (from Uint16x32) AsInt8x64() (to Int8x64) +// AsInt8x64 returns an Int8x64 with the same bit representation as x. +func (x Uint16x32) AsInt8x64() Int8x64 -// Int16x32 converts from Uint16x32 to Int16x32 -func (from Uint16x32) AsInt16x32() (to Int16x32) +// AsInt16x32 returns an Int16x32 with the same bit representation as x. +func (x Uint16x32) AsInt16x32() Int16x32 -// Int32x16 converts from Uint16x32 to Int32x16 -func (from Uint16x32) AsInt32x16() (to Int32x16) +// AsInt32x16 returns an Int32x16 with the same bit representation as x. +func (x Uint16x32) AsInt32x16() Int32x16 -// Int64x8 converts from Uint16x32 to Int64x8 -func (from Uint16x32) AsInt64x8() (to Int64x8) +// AsInt64x8 returns an Int64x8 with the same bit representation as x. +func (x Uint16x32) AsInt64x8() Int64x8 -// Uint8x64 converts from Uint16x32 to Uint8x64 -func (from Uint16x32) AsUint8x64() (to Uint8x64) +// AsUint8x64 returns a Uint8x64 with the same bit representation as x. +func (x Uint16x32) AsUint8x64() Uint8x64 -// Uint32x16 converts from Uint16x32 to Uint32x16 -func (from Uint16x32) AsUint32x16() (to Uint32x16) +// AsUint32x16 returns a Uint32x16 with the same bit representation as x. +func (x Uint16x32) AsUint32x16() Uint32x16 -// Uint64x8 converts from Uint16x32 to Uint64x8 -func (from Uint16x32) AsUint64x8() (to Uint64x8) +// AsUint64x8 returns a Uint64x8 with the same bit representation as x. +func (x Uint16x32) AsUint64x8() Uint64x8 -// Float32x4 converts from Uint32x4 to Float32x4 -func (from Uint32x4) AsFloat32x4() (to Float32x4) +// AsFloat32x4 returns a Float32x4 with the same bit representation as x. +func (x Uint32x4) AsFloat32x4() Float32x4 -// Float64x2 converts from Uint32x4 to Float64x2 -func (from Uint32x4) AsFloat64x2() (to Float64x2) +// AsFloat64x2 returns a Float64x2 with the same bit representation as x. +func (x Uint32x4) AsFloat64x2() Float64x2 -// Int8x16 converts from Uint32x4 to Int8x16 -func (from Uint32x4) AsInt8x16() (to Int8x16) +// AsInt8x16 returns an Int8x16 with the same bit representation as x. +func (x Uint32x4) AsInt8x16() Int8x16 -// Int16x8 converts from Uint32x4 to Int16x8 -func (from Uint32x4) AsInt16x8() (to Int16x8) +// AsInt16x8 returns an Int16x8 with the same bit representation as x. +func (x Uint32x4) AsInt16x8() Int16x8 -// Int32x4 converts from Uint32x4 to Int32x4 -func (from Uint32x4) AsInt32x4() (to Int32x4) +// AsInt32x4 returns an Int32x4 with the same bit representation as x. +func (x Uint32x4) AsInt32x4() Int32x4 -// Int64x2 converts from Uint32x4 to Int64x2 -func (from Uint32x4) AsInt64x2() (to Int64x2) +// AsInt64x2 returns an Int64x2 with the same bit representation as x. +func (x Uint32x4) AsInt64x2() Int64x2 -// Uint8x16 converts from Uint32x4 to Uint8x16 -func (from Uint32x4) AsUint8x16() (to Uint8x16) +// AsUint8x16 returns a Uint8x16 with the same bit representation as x. +func (x Uint32x4) AsUint8x16() Uint8x16 -// Uint16x8 converts from Uint32x4 to Uint16x8 -func (from Uint32x4) AsUint16x8() (to Uint16x8) +// AsUint16x8 returns a Uint16x8 with the same bit representation as x. +func (x Uint32x4) AsUint16x8() Uint16x8 -// Uint64x2 converts from Uint32x4 to Uint64x2 -func (from Uint32x4) AsUint64x2() (to Uint64x2) +// AsUint64x2 returns a Uint64x2 with the same bit representation as x. +func (x Uint32x4) AsUint64x2() Uint64x2 -// Float32x8 converts from Uint32x8 to Float32x8 -func (from Uint32x8) AsFloat32x8() (to Float32x8) +// AsFloat32x8 returns a Float32x8 with the same bit representation as x. +func (x Uint32x8) AsFloat32x8() Float32x8 -// Float64x4 converts from Uint32x8 to Float64x4 -func (from Uint32x8) AsFloat64x4() (to Float64x4) +// AsFloat64x4 returns a Float64x4 with the same bit representation as x. +func (x Uint32x8) AsFloat64x4() Float64x4 -// Int8x32 converts from Uint32x8 to Int8x32 -func (from Uint32x8) AsInt8x32() (to Int8x32) +// AsInt8x32 returns an Int8x32 with the same bit representation as x. +func (x Uint32x8) AsInt8x32() Int8x32 -// Int16x16 converts from Uint32x8 to Int16x16 -func (from Uint32x8) AsInt16x16() (to Int16x16) +// AsInt16x16 returns an Int16x16 with the same bit representation as x. +func (x Uint32x8) AsInt16x16() Int16x16 -// Int32x8 converts from Uint32x8 to Int32x8 -func (from Uint32x8) AsInt32x8() (to Int32x8) +// AsInt32x8 returns an Int32x8 with the same bit representation as x. +func (x Uint32x8) AsInt32x8() Int32x8 -// Int64x4 converts from Uint32x8 to Int64x4 -func (from Uint32x8) AsInt64x4() (to Int64x4) +// AsInt64x4 returns an Int64x4 with the same bit representation as x. +func (x Uint32x8) AsInt64x4() Int64x4 -// Uint8x32 converts from Uint32x8 to Uint8x32 -func (from Uint32x8) AsUint8x32() (to Uint8x32) +// AsUint8x32 returns a Uint8x32 with the same bit representation as x. +func (x Uint32x8) AsUint8x32() Uint8x32 -// Uint16x16 converts from Uint32x8 to Uint16x16 -func (from Uint32x8) AsUint16x16() (to Uint16x16) +// AsUint16x16 returns a Uint16x16 with the same bit representation as x. +func (x Uint32x8) AsUint16x16() Uint16x16 -// Uint64x4 converts from Uint32x8 to Uint64x4 -func (from Uint32x8) AsUint64x4() (to Uint64x4) +// AsUint64x4 returns a Uint64x4 with the same bit representation as x. +func (x Uint32x8) AsUint64x4() Uint64x4 -// Float32x16 converts from Uint32x16 to Float32x16 -func (from Uint32x16) AsFloat32x16() (to Float32x16) +// AsFloat32x16 returns a Float32x16 with the same bit representation as x. +func (x Uint32x16) AsFloat32x16() Float32x16 -// Float64x8 converts from Uint32x16 to Float64x8 -func (from Uint32x16) AsFloat64x8() (to Float64x8) +// AsFloat64x8 returns a Float64x8 with the same bit representation as x. +func (x Uint32x16) AsFloat64x8() Float64x8 -// Int8x64 converts from Uint32x16 to Int8x64 -func (from Uint32x16) AsInt8x64() (to Int8x64) +// AsInt8x64 returns an Int8x64 with the same bit representation as x. +func (x Uint32x16) AsInt8x64() Int8x64 -// Int16x32 converts from Uint32x16 to Int16x32 -func (from Uint32x16) AsInt16x32() (to Int16x32) +// AsInt16x32 returns an Int16x32 with the same bit representation as x. +func (x Uint32x16) AsInt16x32() Int16x32 -// Int32x16 converts from Uint32x16 to Int32x16 -func (from Uint32x16) AsInt32x16() (to Int32x16) +// AsInt32x16 returns an Int32x16 with the same bit representation as x. +func (x Uint32x16) AsInt32x16() Int32x16 -// Int64x8 converts from Uint32x16 to Int64x8 -func (from Uint32x16) AsInt64x8() (to Int64x8) +// AsInt64x8 returns an Int64x8 with the same bit representation as x. +func (x Uint32x16) AsInt64x8() Int64x8 -// Uint8x64 converts from Uint32x16 to Uint8x64 -func (from Uint32x16) AsUint8x64() (to Uint8x64) +// AsUint8x64 returns a Uint8x64 with the same bit representation as x. +func (x Uint32x16) AsUint8x64() Uint8x64 -// Uint16x32 converts from Uint32x16 to Uint16x32 -func (from Uint32x16) AsUint16x32() (to Uint16x32) +// AsUint16x32 returns a Uint16x32 with the same bit representation as x. +func (x Uint32x16) AsUint16x32() Uint16x32 -// Uint64x8 converts from Uint32x16 to Uint64x8 -func (from Uint32x16) AsUint64x8() (to Uint64x8) +// AsUint64x8 returns a Uint64x8 with the same bit representation as x. +func (x Uint32x16) AsUint64x8() Uint64x8 -// Float32x4 converts from Uint64x2 to Float32x4 -func (from Uint64x2) AsFloat32x4() (to Float32x4) +// AsFloat32x4 returns a Float32x4 with the same bit representation as x. +func (x Uint64x2) AsFloat32x4() Float32x4 -// Float64x2 converts from Uint64x2 to Float64x2 -func (from Uint64x2) AsFloat64x2() (to Float64x2) +// AsFloat64x2 returns a Float64x2 with the same bit representation as x. +func (x Uint64x2) AsFloat64x2() Float64x2 -// Int8x16 converts from Uint64x2 to Int8x16 -func (from Uint64x2) AsInt8x16() (to Int8x16) +// AsInt8x16 returns an Int8x16 with the same bit representation as x. +func (x Uint64x2) AsInt8x16() Int8x16 -// Int16x8 converts from Uint64x2 to Int16x8 -func (from Uint64x2) AsInt16x8() (to Int16x8) +// AsInt16x8 returns an Int16x8 with the same bit representation as x. +func (x Uint64x2) AsInt16x8() Int16x8 -// Int32x4 converts from Uint64x2 to Int32x4 -func (from Uint64x2) AsInt32x4() (to Int32x4) +// AsInt32x4 returns an Int32x4 with the same bit representation as x. +func (x Uint64x2) AsInt32x4() Int32x4 -// Int64x2 converts from Uint64x2 to Int64x2 -func (from Uint64x2) AsInt64x2() (to Int64x2) +// AsInt64x2 returns an Int64x2 with the same bit representation as x. +func (x Uint64x2) AsInt64x2() Int64x2 -// Uint8x16 converts from Uint64x2 to Uint8x16 -func (from Uint64x2) AsUint8x16() (to Uint8x16) +// AsUint8x16 returns a Uint8x16 with the same bit representation as x. +func (x Uint64x2) AsUint8x16() Uint8x16 -// Uint16x8 converts from Uint64x2 to Uint16x8 -func (from Uint64x2) AsUint16x8() (to Uint16x8) +// AsUint16x8 returns a Uint16x8 with the same bit representation as x. +func (x Uint64x2) AsUint16x8() Uint16x8 -// Uint32x4 converts from Uint64x2 to Uint32x4 -func (from Uint64x2) AsUint32x4() (to Uint32x4) +// AsUint32x4 returns a Uint32x4 with the same bit representation as x. +func (x Uint64x2) AsUint32x4() Uint32x4 -// Float32x8 converts from Uint64x4 to Float32x8 -func (from Uint64x4) AsFloat32x8() (to Float32x8) +// AsFloat32x8 returns a Float32x8 with the same bit representation as x. +func (x Uint64x4) AsFloat32x8() Float32x8 -// Float64x4 converts from Uint64x4 to Float64x4 -func (from Uint64x4) AsFloat64x4() (to Float64x4) +// AsFloat64x4 returns a Float64x4 with the same bit representation as x. +func (x Uint64x4) AsFloat64x4() Float64x4 -// Int8x32 converts from Uint64x4 to Int8x32 -func (from Uint64x4) AsInt8x32() (to Int8x32) +// AsInt8x32 returns an Int8x32 with the same bit representation as x. +func (x Uint64x4) AsInt8x32() Int8x32 -// Int16x16 converts from Uint64x4 to Int16x16 -func (from Uint64x4) AsInt16x16() (to Int16x16) +// AsInt16x16 returns an Int16x16 with the same bit representation as x. +func (x Uint64x4) AsInt16x16() Int16x16 -// Int32x8 converts from Uint64x4 to Int32x8 -func (from Uint64x4) AsInt32x8() (to Int32x8) +// AsInt32x8 returns an Int32x8 with the same bit representation as x. +func (x Uint64x4) AsInt32x8() Int32x8 -// Int64x4 converts from Uint64x4 to Int64x4 -func (from Uint64x4) AsInt64x4() (to Int64x4) +// AsInt64x4 returns an Int64x4 with the same bit representation as x. +func (x Uint64x4) AsInt64x4() Int64x4 -// Uint8x32 converts from Uint64x4 to Uint8x32 -func (from Uint64x4) AsUint8x32() (to Uint8x32) +// AsUint8x32 returns a Uint8x32 with the same bit representation as x. +func (x Uint64x4) AsUint8x32() Uint8x32 -// Uint16x16 converts from Uint64x4 to Uint16x16 -func (from Uint64x4) AsUint16x16() (to Uint16x16) +// AsUint16x16 returns a Uint16x16 with the same bit representation as x. +func (x Uint64x4) AsUint16x16() Uint16x16 -// Uint32x8 converts from Uint64x4 to Uint32x8 -func (from Uint64x4) AsUint32x8() (to Uint32x8) +// AsUint32x8 returns a Uint32x8 with the same bit representation as x. +func (x Uint64x4) AsUint32x8() Uint32x8 -// Float32x16 converts from Uint64x8 to Float32x16 -func (from Uint64x8) AsFloat32x16() (to Float32x16) +// AsFloat32x16 returns a Float32x16 with the same bit representation as x. +func (x Uint64x8) AsFloat32x16() Float32x16 -// Float64x8 converts from Uint64x8 to Float64x8 -func (from Uint64x8) AsFloat64x8() (to Float64x8) +// AsFloat64x8 returns a Float64x8 with the same bit representation as x. +func (x Uint64x8) AsFloat64x8() Float64x8 -// Int8x64 converts from Uint64x8 to Int8x64 -func (from Uint64x8) AsInt8x64() (to Int8x64) +// AsInt8x64 returns an Int8x64 with the same bit representation as x. +func (x Uint64x8) AsInt8x64() Int8x64 -// Int16x32 converts from Uint64x8 to Int16x32 -func (from Uint64x8) AsInt16x32() (to Int16x32) +// AsInt16x32 returns an Int16x32 with the same bit representation as x. +func (x Uint64x8) AsInt16x32() Int16x32 -// Int32x16 converts from Uint64x8 to Int32x16 -func (from Uint64x8) AsInt32x16() (to Int32x16) +// AsInt32x16 returns an Int32x16 with the same bit representation as x. +func (x Uint64x8) AsInt32x16() Int32x16 -// Int64x8 converts from Uint64x8 to Int64x8 -func (from Uint64x8) AsInt64x8() (to Int64x8) +// AsInt64x8 returns an Int64x8 with the same bit representation as x. +func (x Uint64x8) AsInt64x8() Int64x8 -// Uint8x64 converts from Uint64x8 to Uint8x64 -func (from Uint64x8) AsUint8x64() (to Uint8x64) +// AsUint8x64 returns a Uint8x64 with the same bit representation as x. +func (x Uint64x8) AsUint8x64() Uint8x64 -// Uint16x32 converts from Uint64x8 to Uint16x32 -func (from Uint64x8) AsUint16x32() (to Uint16x32) +// AsUint16x32 returns a Uint16x32 with the same bit representation as x. +func (x Uint64x8) AsUint16x32() Uint16x32 -// Uint32x16 converts from Uint64x8 to Uint32x16 -func (from Uint64x8) AsUint32x16() (to Uint32x16) +// AsUint32x16 returns a Uint32x16 with the same bit representation as x. +func (x Uint64x8) AsUint32x16() Uint32x16 -// ToInt8x16 converts from Mask8x16 to Int8x16 +// ToInt8x16 converts from Mask8x16 to Int8x16. func (from Mask8x16) ToInt8x16() (to Int8x16) -// asMask converts from Int8x16 to Mask8x16 +// asMask converts from Int8x16 to Mask8x16. func (from Int8x16) asMask() (to Mask8x16) func (x Mask8x16) And(y Mask8x16) Mask8x16 func (x Mask8x16) Or(y Mask8x16) Mask8x16 -// ToInt8x32 converts from Mask8x32 to Int8x32 +// ToInt8x32 converts from Mask8x32 to Int8x32. func (from Mask8x32) ToInt8x32() (to Int8x32) -// asMask converts from Int8x32 to Mask8x32 +// asMask converts from Int8x32 to Mask8x32. func (from Int8x32) asMask() (to Mask8x32) func (x Mask8x32) And(y Mask8x32) Mask8x32 func (x Mask8x32) Or(y Mask8x32) Mask8x32 -// ToInt8x64 converts from Mask8x64 to Int8x64 +// ToInt8x64 converts from Mask8x64 to Int8x64. func (from Mask8x64) ToInt8x64() (to Int8x64) -// asMask converts from Int8x64 to Mask8x64 +// asMask converts from Int8x64 to Mask8x64. func (from Int8x64) asMask() (to Mask8x64) func (x Mask8x64) And(y Mask8x64) Mask8x64 func (x Mask8x64) Or(y Mask8x64) Mask8x64 -// ToInt16x8 converts from Mask16x8 to Int16x8 +// ToInt16x8 converts from Mask16x8 to Int16x8. func (from Mask16x8) ToInt16x8() (to Int16x8) -// asMask converts from Int16x8 to Mask16x8 +// asMask converts from Int16x8 to Mask16x8. func (from Int16x8) asMask() (to Mask16x8) func (x Mask16x8) And(y Mask16x8) Mask16x8 func (x Mask16x8) Or(y Mask16x8) Mask16x8 -// ToInt16x16 converts from Mask16x16 to Int16x16 +// ToInt16x16 converts from Mask16x16 to Int16x16. func (from Mask16x16) ToInt16x16() (to Int16x16) -// asMask converts from Int16x16 to Mask16x16 +// asMask converts from Int16x16 to Mask16x16. func (from Int16x16) asMask() (to Mask16x16) func (x Mask16x16) And(y Mask16x16) Mask16x16 func (x Mask16x16) Or(y Mask16x16) Mask16x16 -// ToInt16x32 converts from Mask16x32 to Int16x32 +// ToInt16x32 converts from Mask16x32 to Int16x32. func (from Mask16x32) ToInt16x32() (to Int16x32) -// asMask converts from Int16x32 to Mask16x32 +// asMask converts from Int16x32 to Mask16x32. func (from Int16x32) asMask() (to Mask16x32) func (x Mask16x32) And(y Mask16x32) Mask16x32 func (x Mask16x32) Or(y Mask16x32) Mask16x32 -// ToInt32x4 converts from Mask32x4 to Int32x4 +// ToInt32x4 converts from Mask32x4 to Int32x4. func (from Mask32x4) ToInt32x4() (to Int32x4) -// asMask converts from Int32x4 to Mask32x4 +// asMask converts from Int32x4 to Mask32x4. func (from Int32x4) asMask() (to Mask32x4) func (x Mask32x4) And(y Mask32x4) Mask32x4 func (x Mask32x4) Or(y Mask32x4) Mask32x4 -// ToInt32x8 converts from Mask32x8 to Int32x8 +// ToInt32x8 converts from Mask32x8 to Int32x8. func (from Mask32x8) ToInt32x8() (to Int32x8) -// asMask converts from Int32x8 to Mask32x8 +// asMask converts from Int32x8 to Mask32x8. func (from Int32x8) asMask() (to Mask32x8) func (x Mask32x8) And(y Mask32x8) Mask32x8 func (x Mask32x8) Or(y Mask32x8) Mask32x8 -// ToInt32x16 converts from Mask32x16 to Int32x16 +// ToInt32x16 converts from Mask32x16 to Int32x16. func (from Mask32x16) ToInt32x16() (to Int32x16) -// asMask converts from Int32x16 to Mask32x16 +// asMask converts from Int32x16 to Mask32x16. func (from Int32x16) asMask() (to Mask32x16) func (x Mask32x16) And(y Mask32x16) Mask32x16 func (x Mask32x16) Or(y Mask32x16) Mask32x16 -// ToInt64x2 converts from Mask64x2 to Int64x2 +// ToInt64x2 converts from Mask64x2 to Int64x2. func (from Mask64x2) ToInt64x2() (to Int64x2) -// asMask converts from Int64x2 to Mask64x2 +// asMask converts from Int64x2 to Mask64x2. func (from Int64x2) asMask() (to Mask64x2) func (x Mask64x2) And(y Mask64x2) Mask64x2 func (x Mask64x2) Or(y Mask64x2) Mask64x2 -// ToInt64x4 converts from Mask64x4 to Int64x4 +// ToInt64x4 converts from Mask64x4 to Int64x4. func (from Mask64x4) ToInt64x4() (to Int64x4) -// asMask converts from Int64x4 to Mask64x4 +// asMask converts from Int64x4 to Mask64x4. func (from Int64x4) asMask() (to Mask64x4) func (x Mask64x4) And(y Mask64x4) Mask64x4 func (x Mask64x4) Or(y Mask64x4) Mask64x4 -// ToInt64x8 converts from Mask64x8 to Int64x8 +// ToInt64x8 converts from Mask64x8 to Int64x8. func (from Mask64x8) ToInt64x8() (to Int64x8) -// asMask converts from Int64x8 to Mask64x8 +// asMask converts from Int64x8 to Mask64x8. func (from Int64x8) asMask() (to Mask64x8) func (x Mask64x8) And(y Mask64x8) Mask64x8 diff --git a/src/simd/archsimd/ops_internal_amd64.go b/src/simd/archsimd/ops_internal_amd64.go index 566b88d510..8eae69a7ba 100644 --- a/src/simd/archsimd/ops_internal_amd64.go +++ b/src/simd/archsimd/ops_internal_amd64.go @@ -1,4 +1,4 @@ -// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. //go:build goexperiment.simd @@ -382,7 +382,9 @@ func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x /* permuteScalars */ // permuteScalars performs a permutation of vector x using constant indices: -// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} +// +// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -391,7 +393,9 @@ func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x func (x Int32x4) permuteScalars(indices uint8) Int32x4 // permuteScalars performs a permutation of vector x using constant indices: -// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} +// +// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -402,7 +406,9 @@ func (x Uint32x4) permuteScalars(indices uint8) Uint32x4 /* permuteScalarsGrouped */ // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices: -// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// +// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. // @@ -412,7 +418,9 @@ func (x Uint32x4) permuteScalars(indices uint8) Uint32x4 func (x Int32x8) permuteScalarsGrouped(indices uint8) Int32x8 // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices: -// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// +// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. // @@ -422,7 +430,9 @@ func (x Int32x8) permuteScalarsGrouped(indices uint8) Int32x8 func (x Int32x16) permuteScalarsGrouped(indices uint8) Int32x16 // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices: -// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// +// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. // @@ -432,7 +442,9 @@ func (x Int32x16) permuteScalarsGrouped(indices uint8) Int32x16 func (x Uint32x8) permuteScalarsGrouped(indices uint8) Uint32x8 // permuteScalarsGrouped performs a grouped permutation of vector x using constant indices: -// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// +// result = {x_group0[indices[0:2]], x_group0[indices[2:4]], x_group0[indices[4:6]], x_group0[indices[6:8]], x_group1[indices[0:2]], ...} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // Each group is of size 128-bit. // @@ -444,7 +456,9 @@ func (x Uint32x16) permuteScalarsGrouped(indices uint8) Uint32x16 /* permuteScalarsHi */ // permuteScalarsHi performs a permutation of vector x using constant indices: -// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} +// +// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -453,7 +467,9 @@ func (x Uint32x16) permuteScalarsGrouped(indices uint8) Uint32x16 func (x Int16x8) permuteScalarsHi(indices uint8) Int16x8 // permuteScalarsHi performs a permutation of vector x using constant indices: -// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} +// +// result = {x[0], x[1], x[2], x[3], x[indices[0:2]+4], x[indices[2:4]+4], x[indices[4:6]+4], x[indices[6:8]+4]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -522,7 +538,9 @@ func (x Uint16x32) permuteScalarsHiGrouped(indices uint8) Uint16x32 /* permuteScalarsLo */ // permuteScalarsLo performs a permutation of vector x using constant indices: -// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} +// +// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. @@ -531,7 +549,9 @@ func (x Uint16x32) permuteScalarsHiGrouped(indices uint8) Uint16x32 func (x Int16x8) permuteScalarsLo(indices uint8) Int16x8 // permuteScalarsLo performs a permutation of vector x using constant indices: -// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} +// +// result = {x[indices[0:2]], x[indices[2:4]], x[indices[4:6]], x[indices[6:8]], x[4], x[5], x[6], x[7]} +// // Indices is four 2-bit values packed into a byte, thus indices[0:2] is the first index. // // indices results in better performance when it's a constant, a non-constant value will be translated into a jump table. diff --git a/src/simd/archsimd/other_gen_amd64.go b/src/simd/archsimd/other_gen_amd64.go index 8d04409197..647001acce 100644 --- a/src/simd/archsimd/other_gen_amd64.go +++ b/src/simd/archsimd/other_gen_amd64.go @@ -1,4 +1,4 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. //go:build goexperiment.simd @@ -7,7 +7,7 @@ package archsimd // BroadcastInt8x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt8x16(x int8) Int8x16 { var z Int8x16 return z.SetElem(0, x).Broadcast128() @@ -16,7 +16,7 @@ func BroadcastInt8x16(x int8) Int8x16 { // BroadcastInt16x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt16x8(x int16) Int16x8 { var z Int16x8 return z.SetElem(0, x).Broadcast128() @@ -25,7 +25,7 @@ func BroadcastInt16x8(x int16) Int16x8 { // BroadcastInt32x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt32x4(x int32) Int32x4 { var z Int32x4 return z.SetElem(0, x).Broadcast128() @@ -34,7 +34,7 @@ func BroadcastInt32x4(x int32) Int32x4 { // BroadcastInt64x2 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt64x2(x int64) Int64x2 { var z Int64x2 return z.SetElem(0, x).Broadcast128() @@ -43,7 +43,7 @@ func BroadcastInt64x2(x int64) Int64x2 { // BroadcastUint8x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint8x16(x uint8) Uint8x16 { var z Uint8x16 return z.SetElem(0, x).Broadcast128() @@ -52,7 +52,7 @@ func BroadcastUint8x16(x uint8) Uint8x16 { // BroadcastUint16x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint16x8(x uint16) Uint16x8 { var z Uint16x8 return z.SetElem(0, x).Broadcast128() @@ -61,7 +61,7 @@ func BroadcastUint16x8(x uint16) Uint16x8 { // BroadcastUint32x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint32x4(x uint32) Uint32x4 { var z Uint32x4 return z.SetElem(0, x).Broadcast128() @@ -70,7 +70,7 @@ func BroadcastUint32x4(x uint32) Uint32x4 { // BroadcastUint64x2 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint64x2(x uint64) Uint64x2 { var z Uint64x2 return z.SetElem(0, x).Broadcast128() @@ -79,7 +79,7 @@ func BroadcastUint64x2(x uint64) Uint64x2 { // BroadcastFloat32x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastFloat32x4(x float32) Float32x4 { var z Float32x4 return z.SetElem(0, x).Broadcast128() @@ -88,7 +88,7 @@ func BroadcastFloat32x4(x float32) Float32x4 { // BroadcastFloat64x2 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastFloat64x2(x float64) Float64x2 { var z Float64x2 return z.SetElem(0, x).Broadcast128() @@ -97,7 +97,7 @@ func BroadcastFloat64x2(x float64) Float64x2 { // BroadcastInt8x32 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt8x32(x int8) Int8x32 { var z Int8x16 return z.SetElem(0, x).Broadcast256() @@ -106,7 +106,7 @@ func BroadcastInt8x32(x int8) Int8x32 { // BroadcastInt16x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt16x16(x int16) Int16x16 { var z Int16x8 return z.SetElem(0, x).Broadcast256() @@ -115,7 +115,7 @@ func BroadcastInt16x16(x int16) Int16x16 { // BroadcastInt32x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt32x8(x int32) Int32x8 { var z Int32x4 return z.SetElem(0, x).Broadcast256() @@ -124,7 +124,7 @@ func BroadcastInt32x8(x int32) Int32x8 { // BroadcastInt64x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastInt64x4(x int64) Int64x4 { var z Int64x2 return z.SetElem(0, x).Broadcast256() @@ -133,7 +133,7 @@ func BroadcastInt64x4(x int64) Int64x4 { // BroadcastUint8x32 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint8x32(x uint8) Uint8x32 { var z Uint8x16 return z.SetElem(0, x).Broadcast256() @@ -142,7 +142,7 @@ func BroadcastUint8x32(x uint8) Uint8x32 { // BroadcastUint16x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint16x16(x uint16) Uint16x16 { var z Uint16x8 return z.SetElem(0, x).Broadcast256() @@ -151,7 +151,7 @@ func BroadcastUint16x16(x uint16) Uint16x16 { // BroadcastUint32x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint32x8(x uint32) Uint32x8 { var z Uint32x4 return z.SetElem(0, x).Broadcast256() @@ -160,7 +160,7 @@ func BroadcastUint32x8(x uint32) Uint32x8 { // BroadcastUint64x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastUint64x4(x uint64) Uint64x4 { var z Uint64x2 return z.SetElem(0, x).Broadcast256() @@ -169,7 +169,7 @@ func BroadcastUint64x4(x uint64) Uint64x4 { // BroadcastFloat32x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastFloat32x8(x float32) Float32x8 { var z Float32x4 return z.SetElem(0, x).Broadcast256() @@ -178,7 +178,7 @@ func BroadcastFloat32x8(x float32) Float32x8 { // BroadcastFloat64x4 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func BroadcastFloat64x4(x float64) Float64x4 { var z Float64x2 return z.SetElem(0, x).Broadcast256() @@ -187,7 +187,7 @@ func BroadcastFloat64x4(x float64) Float64x4 { // BroadcastInt8x64 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512BW +// Emulated, CPU Feature: AVX512BW func BroadcastInt8x64(x int8) Int8x64 { var z Int8x16 return z.SetElem(0, x).Broadcast512() @@ -196,7 +196,7 @@ func BroadcastInt8x64(x int8) Int8x64 { // BroadcastInt16x32 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512BW +// Emulated, CPU Feature: AVX512BW func BroadcastInt16x32(x int16) Int16x32 { var z Int16x8 return z.SetElem(0, x).Broadcast512() @@ -205,7 +205,7 @@ func BroadcastInt16x32(x int16) Int16x32 { // BroadcastInt32x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastInt32x16(x int32) Int32x16 { var z Int32x4 return z.SetElem(0, x).Broadcast512() @@ -214,7 +214,7 @@ func BroadcastInt32x16(x int32) Int32x16 { // BroadcastInt64x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastInt64x8(x int64) Int64x8 { var z Int64x2 return z.SetElem(0, x).Broadcast512() @@ -223,7 +223,7 @@ func BroadcastInt64x8(x int64) Int64x8 { // BroadcastUint8x64 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512BW +// Emulated, CPU Feature: AVX512BW func BroadcastUint8x64(x uint8) Uint8x64 { var z Uint8x16 return z.SetElem(0, x).Broadcast512() @@ -232,7 +232,7 @@ func BroadcastUint8x64(x uint8) Uint8x64 { // BroadcastUint16x32 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512BW +// Emulated, CPU Feature: AVX512BW func BroadcastUint16x32(x uint16) Uint16x32 { var z Uint16x8 return z.SetElem(0, x).Broadcast512() @@ -241,7 +241,7 @@ func BroadcastUint16x32(x uint16) Uint16x32 { // BroadcastUint32x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastUint32x16(x uint32) Uint32x16 { var z Uint32x4 return z.SetElem(0, x).Broadcast512() @@ -250,7 +250,7 @@ func BroadcastUint32x16(x uint32) Uint32x16 { // BroadcastUint64x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastUint64x8(x uint64) Uint64x8 { var z Uint64x2 return z.SetElem(0, x).Broadcast512() @@ -259,7 +259,7 @@ func BroadcastUint64x8(x uint64) Uint64x8 { // BroadcastFloat32x16 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastFloat32x16(x float32) Float32x16 { var z Float32x4 return z.SetElem(0, x).Broadcast512() @@ -268,7 +268,7 @@ func BroadcastFloat32x16(x float32) Float32x16 { // BroadcastFloat64x8 returns a vector with the input // x assigned to all elements of the output. // -// Emulated, CPU Feature AVX512F +// Emulated, CPU Feature: AVX512F func BroadcastFloat64x8(x float64) Float64x8 { var z Float64x2 return z.SetElem(0, x).Broadcast512() @@ -334,378 +334,378 @@ func (from Int64x8) ToMask() (to Mask64x8) { return from.NotEqual(Int64x8{}) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int8x16) Not() Int8x16 { return x.Xor(x.Equal(x).ToInt8x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int16x8) Not() Int16x8 { return x.Xor(x.Equal(x).ToInt16x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int32x4) Not() Int32x4 { return x.Xor(x.Equal(x).ToInt32x4()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Int64x2) Not() Int64x2 { return x.Xor(x.Equal(x).ToInt64x2()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int8x32) Not() Int8x32 { return x.Xor(x.Equal(x).ToInt8x32()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int16x16) Not() Int16x16 { return x.Xor(x.Equal(x).ToInt16x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int32x8) Not() Int32x8 { return x.Xor(x.Equal(x).ToInt32x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Int64x4) Not() Int64x4 { return x.Xor(x.Equal(x).ToInt64x4()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Int8x64) Not() Int8x64 { return x.Xor(x.Equal(x).ToInt8x64()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Int16x32) Not() Int16x32 { return x.Xor(x.Equal(x).ToInt16x32()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Int32x16) Not() Int32x16 { return x.Xor(x.Equal(x).ToInt32x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Int64x8) Not() Int64x8 { return x.Xor(x.Equal(x).ToInt64x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint8x16) Not() Uint8x16 { return x.Xor(x.Equal(x).ToInt8x16().AsUint8x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint16x8) Not() Uint16x8 { return x.Xor(x.Equal(x).ToInt16x8().AsUint16x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint32x4) Not() Uint32x4 { return x.Xor(x.Equal(x).ToInt32x4().AsUint32x4()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX +// Emulated, CPU Feature: AVX func (x Uint64x2) Not() Uint64x2 { return x.Xor(x.Equal(x).ToInt64x2().AsUint64x2()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint8x32) Not() Uint8x32 { return x.Xor(x.Equal(x).ToInt8x32().AsUint8x32()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint16x16) Not() Uint16x16 { return x.Xor(x.Equal(x).ToInt16x16().AsUint16x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint32x8) Not() Uint32x8 { return x.Xor(x.Equal(x).ToInt32x8().AsUint32x8()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX2 +// Emulated, CPU Feature: AVX2 func (x Uint64x4) Not() Uint64x4 { return x.Xor(x.Equal(x).ToInt64x4().AsUint64x4()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Uint8x64) Not() Uint8x64 { return x.Xor(x.Equal(x).ToInt8x64().AsUint8x64()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Uint16x32) Not() Uint16x32 { return x.Xor(x.Equal(x).ToInt16x32().AsUint16x32()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Uint32x16) Not() Uint32x16 { return x.Xor(x.Equal(x).ToInt32x16().AsUint32x16()) } -// Not returns the bitwise complement of x +// Not returns the bitwise complement of x. // -// Emulated, CPU Feature AVX512 +// Emulated, CPU Feature: AVX512 func (x Uint64x8) Not() Uint64x8 { return x.Xor(x.Equal(x).ToInt64x8().AsUint64x8()) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int8x16) String() string { var s [16]int8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int16x8) String() string { var s [8]int16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int32x4) String() string { var s [4]int32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int64x2) String() string { var s [2]int64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint8x16) String() string { var s [16]uint8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint16x8) String() string { var s [8]uint16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint32x4) String() string { var s [4]uint32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint64x2) String() string { var s [2]uint64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float32x4) String() string { var s [4]float32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float64x2) String() string { var s [2]float64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int8x32) String() string { var s [32]int8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int16x16) String() string { var s [16]int16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int32x8) String() string { var s [8]int32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int64x4) String() string { var s [4]int64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint8x32) String() string { var s [32]uint8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint16x16) String() string { var s [16]uint16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint32x8) String() string { var s [8]uint32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint64x4) String() string { var s [4]uint64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float32x8) String() string { var s [8]float32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float64x4) String() string { var s [4]float64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int8x64) String() string { var s [64]int8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int16x32) String() string { var s [32]int16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int32x16) String() string { var s [16]int32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Int64x8) String() string { var s [8]int64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint8x64) String() string { var s [64]uint8 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint16x32) String() string { var s [32]uint16 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint32x16) String() string { var s [16]uint32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Uint64x8) String() string { var s [8]uint64 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float32x16) String() string { var s [16]float32 x.Store(&s) return sliceToString(s[:]) } -// String returns a string representation of SIMD vector x +// String returns a string representation of SIMD vector x. func (x Float64x8) String() string { var s [8]float64 x.Store(&s) diff --git a/src/simd/archsimd/shuffles_amd64.go b/src/simd/archsimd/shuffles_amd64.go index 2bbd89c725..355634fcae 100644 --- a/src/simd/archsimd/shuffles_amd64.go +++ b/src/simd/archsimd/shuffles_amd64.go @@ -54,7 +54,10 @@ const ( // requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) returns {4,8,25,81} +// +// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) +// +// returns {4,8,25,81}. // // If the selectors are not constant this will translate to a function // call. @@ -133,7 +136,10 @@ func (x Int32x4) SelectFromPair(a, b, c, d uint8, y Int32x4) Int32x4 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) returns {4,8,25,81} +// +// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) +// +// returns {4,8,25,81}. // // If the selectors are not constant this will translate to a function // call. @@ -205,7 +211,10 @@ func (x Uint32x4) SelectFromPair(a, b, c, d uint8, y Uint32x4) Uint32x4 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) returns {4,8,25,81} +// +// {1,2,4,8}.SelectFromPair(2,3,5,7,{9,25,49,81}) +// +// returns {4,8,25,81}. // // If the selectors are not constant this will translate to a function // call. @@ -278,9 +287,10 @@ func (x Float32x4) SelectFromPair(a, b, c, d uint8, y Float32x4) Float32x4 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) // -// returns {4,8,25,81,64,128,169,289} +// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) +// +// returns {4,8,25,81,64,128,169,289}. // // If the selectors are not constant this will translate to a function // call. @@ -353,9 +363,10 @@ func (x Int32x8) SelectFromPairGrouped(a, b, c, d uint8, y Int32x8) Int32x8 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) // -// returns {4,8,25,81,64,128,169,289} +// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) +// +// returns {4,8,25,81,64,128,169,289}. // // If the selectors are not constant this will translate to a function // call. @@ -428,9 +439,10 @@ func (x Uint32x8) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x8) Uint32x8 { // it requires two. a is the source index of the least element in the // output, and b, c, and d are the indices of the 2nd, 3rd, and 4th // elements in the output. For example, -// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) // -// returns {4,8,25,81,64,128,169,289} +// {1,2,4,8,16,32,64,128}.SelectFromPair(2,3,5,7,{9,25,49,81,121,169,225,289}) +// +// returns {4,8,25,81,64,128,169,289}. // // If the selectors are not constant this will translate to a function // call. @@ -1080,7 +1092,7 @@ func (x Uint32x16) PermuteScalarsGrouped(a, b, c, d uint8) Uint32x16 { // PermuteScalarsHi performs a permutation of vector x using the supplied indices: // -// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]} +// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]} // // Parameters a,b,c,d should have values between 0 and 3. // If a through d are constants, then an instruction will be inlined, otherwise @@ -1093,7 +1105,7 @@ func (x Int16x8) PermuteScalarsHi(a, b, c, d uint8) Int16x8 { // PermuteScalarsHi performs a permutation of vector x using the supplied indices: // -// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]} +// result = {x[0], x[1], x[2], x[3], x[a+4], x[b+4], x[c+4], x[d+4]} // // Parameters a,b,c,d should have values between 0 and 3. // If a through d are constants, then an instruction will be inlined, otherwise @@ -1276,7 +1288,8 @@ func (x Uint16x32) PermuteScalarsLoGrouped(a, b, c, d uint8) Uint16x32 { // // A carryless multiplication uses bitwise XOR instead of // add-with-carry, for example (in base two): -// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 +// +// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 // // This also models multiplication of polynomials with coefficients // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 = @@ -1300,7 +1313,8 @@ func (x Uint64x2) CarrylessMultiply(a, b uint8, y Uint64x2) Uint64x2 { // // A carryless multiplication uses bitwise XOR instead of // add-with-carry, for example (in base two): -// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 +// +// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 // // This also models multiplication of polynomials with coefficients // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 = @@ -1324,7 +1338,8 @@ func (x Uint64x4) CarrylessMultiplyGrouped(a, b uint8, y Uint64x4) Uint64x4 { // // A carryless multiplication uses bitwise XOR instead of // add-with-carry, for example (in base two): -// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 +// +// 11 * 11 = 11 * (10 ^ 1) = (11 * 10) ^ (11 * 1) = 110 ^ 11 = 101 // // This also models multiplication of polynomials with coefficients // from GF(2) -- 11 * 11 models (x+1)*(x+1) = x**2 + (1^1)x + 1 = diff --git a/src/simd/archsimd/slice_gen_amd64.go b/src/simd/archsimd/slice_gen_amd64.go index c03e28206d..9e34f9ca56 100644 --- a/src/simd/archsimd/slice_gen_amd64.go +++ b/src/simd/archsimd/slice_gen_amd64.go @@ -1,4 +1,4 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. //go:build goexperiment.simd @@ -6,302 +6,302 @@ package archsimd import "unsafe" -// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s +// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s. func LoadInt8x16Slice(s []int8) Int8x16 { return LoadInt8x16((*[16]int8)(s)) } -// StoreSlice stores x into a slice of at least 16 int8s +// StoreSlice stores x into a slice of at least 16 int8s. func (x Int8x16) StoreSlice(s []int8) { x.Store((*[16]int8)(s)) } -// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s +// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s. func LoadInt16x8Slice(s []int16) Int16x8 { return LoadInt16x8((*[8]int16)(s)) } -// StoreSlice stores x into a slice of at least 8 int16s +// StoreSlice stores x into a slice of at least 8 int16s. func (x Int16x8) StoreSlice(s []int16) { x.Store((*[8]int16)(s)) } -// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s +// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s. func LoadInt32x4Slice(s []int32) Int32x4 { return LoadInt32x4((*[4]int32)(s)) } -// StoreSlice stores x into a slice of at least 4 int32s +// StoreSlice stores x into a slice of at least 4 int32s. func (x Int32x4) StoreSlice(s []int32) { x.Store((*[4]int32)(s)) } -// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s +// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s. func LoadInt64x2Slice(s []int64) Int64x2 { return LoadInt64x2((*[2]int64)(s)) } -// StoreSlice stores x into a slice of at least 2 int64s +// StoreSlice stores x into a slice of at least 2 int64s. func (x Int64x2) StoreSlice(s []int64) { x.Store((*[2]int64)(s)) } -// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s +// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s. func LoadUint8x16Slice(s []uint8) Uint8x16 { return LoadUint8x16((*[16]uint8)(s)) } -// StoreSlice stores x into a slice of at least 16 uint8s +// StoreSlice stores x into a slice of at least 16 uint8s. func (x Uint8x16) StoreSlice(s []uint8) { x.Store((*[16]uint8)(s)) } -// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s +// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s. func LoadUint16x8Slice(s []uint16) Uint16x8 { return LoadUint16x8((*[8]uint16)(s)) } -// StoreSlice stores x into a slice of at least 8 uint16s +// StoreSlice stores x into a slice of at least 8 uint16s. func (x Uint16x8) StoreSlice(s []uint16) { x.Store((*[8]uint16)(s)) } -// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s +// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s. func LoadUint32x4Slice(s []uint32) Uint32x4 { return LoadUint32x4((*[4]uint32)(s)) } -// StoreSlice stores x into a slice of at least 4 uint32s +// StoreSlice stores x into a slice of at least 4 uint32s. func (x Uint32x4) StoreSlice(s []uint32) { x.Store((*[4]uint32)(s)) } -// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s +// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s. func LoadUint64x2Slice(s []uint64) Uint64x2 { return LoadUint64x2((*[2]uint64)(s)) } -// StoreSlice stores x into a slice of at least 2 uint64s +// StoreSlice stores x into a slice of at least 2 uint64s. func (x Uint64x2) StoreSlice(s []uint64) { x.Store((*[2]uint64)(s)) } -// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s +// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s. func LoadFloat32x4Slice(s []float32) Float32x4 { return LoadFloat32x4((*[4]float32)(s)) } -// StoreSlice stores x into a slice of at least 4 float32s +// StoreSlice stores x into a slice of at least 4 float32s. func (x Float32x4) StoreSlice(s []float32) { x.Store((*[4]float32)(s)) } -// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s +// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s. func LoadFloat64x2Slice(s []float64) Float64x2 { return LoadFloat64x2((*[2]float64)(s)) } -// StoreSlice stores x into a slice of at least 2 float64s +// StoreSlice stores x into a slice of at least 2 float64s. func (x Float64x2) StoreSlice(s []float64) { x.Store((*[2]float64)(s)) } -// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s +// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s. func LoadInt8x32Slice(s []int8) Int8x32 { return LoadInt8x32((*[32]int8)(s)) } -// StoreSlice stores x into a slice of at least 32 int8s +// StoreSlice stores x into a slice of at least 32 int8s. func (x Int8x32) StoreSlice(s []int8) { x.Store((*[32]int8)(s)) } -// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s +// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s. func LoadInt16x16Slice(s []int16) Int16x16 { return LoadInt16x16((*[16]int16)(s)) } -// StoreSlice stores x into a slice of at least 16 int16s +// StoreSlice stores x into a slice of at least 16 int16s. func (x Int16x16) StoreSlice(s []int16) { x.Store((*[16]int16)(s)) } -// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s +// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s. func LoadInt32x8Slice(s []int32) Int32x8 { return LoadInt32x8((*[8]int32)(s)) } -// StoreSlice stores x into a slice of at least 8 int32s +// StoreSlice stores x into a slice of at least 8 int32s. func (x Int32x8) StoreSlice(s []int32) { x.Store((*[8]int32)(s)) } -// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s +// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s. func LoadInt64x4Slice(s []int64) Int64x4 { return LoadInt64x4((*[4]int64)(s)) } -// StoreSlice stores x into a slice of at least 4 int64s +// StoreSlice stores x into a slice of at least 4 int64s. func (x Int64x4) StoreSlice(s []int64) { x.Store((*[4]int64)(s)) } -// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s +// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s. func LoadUint8x32Slice(s []uint8) Uint8x32 { return LoadUint8x32((*[32]uint8)(s)) } -// StoreSlice stores x into a slice of at least 32 uint8s +// StoreSlice stores x into a slice of at least 32 uint8s. func (x Uint8x32) StoreSlice(s []uint8) { x.Store((*[32]uint8)(s)) } -// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s +// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s. func LoadUint16x16Slice(s []uint16) Uint16x16 { return LoadUint16x16((*[16]uint16)(s)) } -// StoreSlice stores x into a slice of at least 16 uint16s +// StoreSlice stores x into a slice of at least 16 uint16s. func (x Uint16x16) StoreSlice(s []uint16) { x.Store((*[16]uint16)(s)) } -// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s +// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s. func LoadUint32x8Slice(s []uint32) Uint32x8 { return LoadUint32x8((*[8]uint32)(s)) } -// StoreSlice stores x into a slice of at least 8 uint32s +// StoreSlice stores x into a slice of at least 8 uint32s. func (x Uint32x8) StoreSlice(s []uint32) { x.Store((*[8]uint32)(s)) } -// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s +// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s. func LoadUint64x4Slice(s []uint64) Uint64x4 { return LoadUint64x4((*[4]uint64)(s)) } -// StoreSlice stores x into a slice of at least 4 uint64s +// StoreSlice stores x into a slice of at least 4 uint64s. func (x Uint64x4) StoreSlice(s []uint64) { x.Store((*[4]uint64)(s)) } -// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s +// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s. func LoadFloat32x8Slice(s []float32) Float32x8 { return LoadFloat32x8((*[8]float32)(s)) } -// StoreSlice stores x into a slice of at least 8 float32s +// StoreSlice stores x into a slice of at least 8 float32s. func (x Float32x8) StoreSlice(s []float32) { x.Store((*[8]float32)(s)) } -// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s +// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s. func LoadFloat64x4Slice(s []float64) Float64x4 { return LoadFloat64x4((*[4]float64)(s)) } -// StoreSlice stores x into a slice of at least 4 float64s +// StoreSlice stores x into a slice of at least 4 float64s. func (x Float64x4) StoreSlice(s []float64) { x.Store((*[4]float64)(s)) } -// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s +// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s. func LoadInt8x64Slice(s []int8) Int8x64 { return LoadInt8x64((*[64]int8)(s)) } -// StoreSlice stores x into a slice of at least 64 int8s +// StoreSlice stores x into a slice of at least 64 int8s. func (x Int8x64) StoreSlice(s []int8) { x.Store((*[64]int8)(s)) } -// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s +// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s. func LoadInt16x32Slice(s []int16) Int16x32 { return LoadInt16x32((*[32]int16)(s)) } -// StoreSlice stores x into a slice of at least 32 int16s +// StoreSlice stores x into a slice of at least 32 int16s. func (x Int16x32) StoreSlice(s []int16) { x.Store((*[32]int16)(s)) } -// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s +// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s. func LoadInt32x16Slice(s []int32) Int32x16 { return LoadInt32x16((*[16]int32)(s)) } -// StoreSlice stores x into a slice of at least 16 int32s +// StoreSlice stores x into a slice of at least 16 int32s. func (x Int32x16) StoreSlice(s []int32) { x.Store((*[16]int32)(s)) } -// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s +// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s. func LoadInt64x8Slice(s []int64) Int64x8 { return LoadInt64x8((*[8]int64)(s)) } -// StoreSlice stores x into a slice of at least 8 int64s +// StoreSlice stores x into a slice of at least 8 int64s. func (x Int64x8) StoreSlice(s []int64) { x.Store((*[8]int64)(s)) } -// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s +// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s. func LoadUint8x64Slice(s []uint8) Uint8x64 { return LoadUint8x64((*[64]uint8)(s)) } -// StoreSlice stores x into a slice of at least 64 uint8s +// StoreSlice stores x into a slice of at least 64 uint8s. func (x Uint8x64) StoreSlice(s []uint8) { x.Store((*[64]uint8)(s)) } -// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s +// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s. func LoadUint16x32Slice(s []uint16) Uint16x32 { return LoadUint16x32((*[32]uint16)(s)) } -// StoreSlice stores x into a slice of at least 32 uint16s +// StoreSlice stores x into a slice of at least 32 uint16s. func (x Uint16x32) StoreSlice(s []uint16) { x.Store((*[32]uint16)(s)) } -// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s +// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s. func LoadUint32x16Slice(s []uint32) Uint32x16 { return LoadUint32x16((*[16]uint32)(s)) } -// StoreSlice stores x into a slice of at least 16 uint32s +// StoreSlice stores x into a slice of at least 16 uint32s. func (x Uint32x16) StoreSlice(s []uint32) { x.Store((*[16]uint32)(s)) } -// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s +// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s. func LoadUint64x8Slice(s []uint64) Uint64x8 { return LoadUint64x8((*[8]uint64)(s)) } -// StoreSlice stores x into a slice of at least 8 uint64s +// StoreSlice stores x into a slice of at least 8 uint64s. func (x Uint64x8) StoreSlice(s []uint64) { x.Store((*[8]uint64)(s)) } -// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s +// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s. func LoadFloat32x16Slice(s []float32) Float32x16 { return LoadFloat32x16((*[16]float32)(s)) } -// StoreSlice stores x into a slice of at least 16 float32s +// StoreSlice stores x into a slice of at least 16 float32s. func (x Float32x16) StoreSlice(s []float32) { x.Store((*[16]float32)(s)) } -// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s +// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s. func LoadFloat64x8Slice(s []float64) Float64x8 { return LoadFloat64x8((*[8]float64)(s)) } -// StoreSlice stores x into a slice of at least 8 float64s +// StoreSlice stores x into a slice of at least 8 float64s. func (x Float64x8) StoreSlice(s []float64) { x.Store((*[8]float64)(s)) } diff --git a/src/simd/archsimd/types_amd64.go b/src/simd/archsimd/types_amd64.go index 556383b380..3d0a49dc09 100644 --- a/src/simd/archsimd/types_amd64.go +++ b/src/simd/archsimd/types_amd64.go @@ -1,4 +1,4 @@ -// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. +// Code generated by 'simdgen -o godefs -goroot $GOROOT -xedPath $XED_PATH go.yaml types.yaml categories.yaml'; DO NOT EDIT. //go:build goexperiment.simd @@ -9,27 +9,27 @@ type v128 struct { _128 [0]func() // uncomparable } -// Float32x4 is a 128-bit SIMD vector of 4 float32 +// Float32x4 is a 128-bit SIMD vector of 4 float32s. type Float32x4 struct { float32x4 v128 vals [4]float32 } -// Len returns the number of elements in a Float32x4 +// Len returns the number of elements in a Float32x4. func (x Float32x4) Len() int { return 4 } -// LoadFloat32x4 loads a Float32x4 from an array +// LoadFloat32x4 loads a Float32x4 from an array. // //go:noescape func LoadFloat32x4(y *[4]float32) Float32x4 -// Store stores a Float32x4 to an array +// Store stores a Float32x4 to an array. // //go:noescape func (x Float32x4) Store(y *[4]float32) // LoadMaskedFloat32x4 loads a Float32x4 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // @@ -37,34 +37,34 @@ func (x Float32x4) Store(y *[4]float32) func LoadMaskedFloat32x4(y *[4]float32, mask Mask32x4) Float32x4 // StoreMasked stores a Float32x4 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Float32x4) StoreMasked(y *[4]float32, mask Mask32x4) -// Float64x2 is a 128-bit SIMD vector of 2 float64 +// Float64x2 is a 128-bit SIMD vector of 2 float64s. type Float64x2 struct { float64x2 v128 vals [2]float64 } -// Len returns the number of elements in a Float64x2 +// Len returns the number of elements in a Float64x2. func (x Float64x2) Len() int { return 2 } -// LoadFloat64x2 loads a Float64x2 from an array +// LoadFloat64x2 loads a Float64x2 from an array. // //go:noescape func LoadFloat64x2(y *[2]float64) Float64x2 -// Store stores a Float64x2 to an array +// Store stores a Float64x2 to an array. // //go:noescape func (x Float64x2) Store(y *[2]float64) // LoadMaskedFloat64x2 loads a Float64x2 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // @@ -72,180 +72,180 @@ func (x Float64x2) Store(y *[2]float64) func LoadMaskedFloat64x2(y *[2]float64, mask Mask64x2) Float64x2 // StoreMasked stores a Float64x2 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Float64x2) StoreMasked(y *[2]float64, mask Mask64x2) -// Int8x16 is a 128-bit SIMD vector of 16 int8 +// Int8x16 is a 128-bit SIMD vector of 16 int8s. type Int8x16 struct { int8x16 v128 vals [16]int8 } -// Len returns the number of elements in a Int8x16 +// Len returns the number of elements in an Int8x16. func (x Int8x16) Len() int { return 16 } -// LoadInt8x16 loads a Int8x16 from an array +// LoadInt8x16 loads an Int8x16 from an array. // //go:noescape func LoadInt8x16(y *[16]int8) Int8x16 -// Store stores a Int8x16 to an array +// Store stores an Int8x16 to an array. // //go:noescape func (x Int8x16) Store(y *[16]int8) -// Int16x8 is a 128-bit SIMD vector of 8 int16 +// Int16x8 is a 128-bit SIMD vector of 8 int16s. type Int16x8 struct { int16x8 v128 vals [8]int16 } -// Len returns the number of elements in a Int16x8 +// Len returns the number of elements in an Int16x8. func (x Int16x8) Len() int { return 8 } -// LoadInt16x8 loads a Int16x8 from an array +// LoadInt16x8 loads an Int16x8 from an array. // //go:noescape func LoadInt16x8(y *[8]int16) Int16x8 -// Store stores a Int16x8 to an array +// Store stores an Int16x8 to an array. // //go:noescape func (x Int16x8) Store(y *[8]int16) -// Int32x4 is a 128-bit SIMD vector of 4 int32 +// Int32x4 is a 128-bit SIMD vector of 4 int32s. type Int32x4 struct { int32x4 v128 vals [4]int32 } -// Len returns the number of elements in a Int32x4 +// Len returns the number of elements in an Int32x4. func (x Int32x4) Len() int { return 4 } -// LoadInt32x4 loads a Int32x4 from an array +// LoadInt32x4 loads an Int32x4 from an array. // //go:noescape func LoadInt32x4(y *[4]int32) Int32x4 -// Store stores a Int32x4 to an array +// Store stores an Int32x4 to an array. // //go:noescape func (x Int32x4) Store(y *[4]int32) -// LoadMaskedInt32x4 loads a Int32x4 from an array, -// at those elements enabled by mask +// LoadMaskedInt32x4 loads an Int32x4 from an array, +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func LoadMaskedInt32x4(y *[4]int32, mask Mask32x4) Int32x4 -// StoreMasked stores a Int32x4 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int32x4 to an array, +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Int32x4) StoreMasked(y *[4]int32, mask Mask32x4) -// Int64x2 is a 128-bit SIMD vector of 2 int64 +// Int64x2 is a 128-bit SIMD vector of 2 int64s. type Int64x2 struct { int64x2 v128 vals [2]int64 } -// Len returns the number of elements in a Int64x2 +// Len returns the number of elements in an Int64x2. func (x Int64x2) Len() int { return 2 } -// LoadInt64x2 loads a Int64x2 from an array +// LoadInt64x2 loads an Int64x2 from an array. // //go:noescape func LoadInt64x2(y *[2]int64) Int64x2 -// Store stores a Int64x2 to an array +// Store stores an Int64x2 to an array. // //go:noescape func (x Int64x2) Store(y *[2]int64) -// LoadMaskedInt64x2 loads a Int64x2 from an array, -// at those elements enabled by mask +// LoadMaskedInt64x2 loads an Int64x2 from an array, +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func LoadMaskedInt64x2(y *[2]int64, mask Mask64x2) Int64x2 -// StoreMasked stores a Int64x2 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int64x2 to an array, +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Int64x2) StoreMasked(y *[2]int64, mask Mask64x2) -// Uint8x16 is a 128-bit SIMD vector of 16 uint8 +// Uint8x16 is a 128-bit SIMD vector of 16 uint8s. type Uint8x16 struct { uint8x16 v128 vals [16]uint8 } -// Len returns the number of elements in a Uint8x16 +// Len returns the number of elements in a Uint8x16. func (x Uint8x16) Len() int { return 16 } -// LoadUint8x16 loads a Uint8x16 from an array +// LoadUint8x16 loads a Uint8x16 from an array. // //go:noescape func LoadUint8x16(y *[16]uint8) Uint8x16 -// Store stores a Uint8x16 to an array +// Store stores a Uint8x16 to an array. // //go:noescape func (x Uint8x16) Store(y *[16]uint8) -// Uint16x8 is a 128-bit SIMD vector of 8 uint16 +// Uint16x8 is a 128-bit SIMD vector of 8 uint16s. type Uint16x8 struct { uint16x8 v128 vals [8]uint16 } -// Len returns the number of elements in a Uint16x8 +// Len returns the number of elements in a Uint16x8. func (x Uint16x8) Len() int { return 8 } -// LoadUint16x8 loads a Uint16x8 from an array +// LoadUint16x8 loads a Uint16x8 from an array. // //go:noescape func LoadUint16x8(y *[8]uint16) Uint16x8 -// Store stores a Uint16x8 to an array +// Store stores a Uint16x8 to an array. // //go:noescape func (x Uint16x8) Store(y *[8]uint16) -// Uint32x4 is a 128-bit SIMD vector of 4 uint32 +// Uint32x4 is a 128-bit SIMD vector of 4 uint32s. type Uint32x4 struct { uint32x4 v128 vals [4]uint32 } -// Len returns the number of elements in a Uint32x4 +// Len returns the number of elements in a Uint32x4. func (x Uint32x4) Len() int { return 4 } -// LoadUint32x4 loads a Uint32x4 from an array +// LoadUint32x4 loads a Uint32x4 from an array. // //go:noescape func LoadUint32x4(y *[4]uint32) Uint32x4 -// Store stores a Uint32x4 to an array +// Store stores a Uint32x4 to an array. // //go:noescape func (x Uint32x4) Store(y *[4]uint32) // LoadMaskedUint32x4 loads a Uint32x4 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // @@ -253,34 +253,34 @@ func (x Uint32x4) Store(y *[4]uint32) func LoadMaskedUint32x4(y *[4]uint32, mask Mask32x4) Uint32x4 // StoreMasked stores a Uint32x4 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Uint32x4) StoreMasked(y *[4]uint32, mask Mask32x4) -// Uint64x2 is a 128-bit SIMD vector of 2 uint64 +// Uint64x2 is a 128-bit SIMD vector of 2 uint64s. type Uint64x2 struct { uint64x2 v128 vals [2]uint64 } -// Len returns the number of elements in a Uint64x2 +// Len returns the number of elements in a Uint64x2. func (x Uint64x2) Len() int { return 2 } -// LoadUint64x2 loads a Uint64x2 from an array +// LoadUint64x2 loads a Uint64x2 from an array. // //go:noescape func LoadUint64x2(y *[2]uint64) Uint64x2 -// Store stores a Uint64x2 to an array +// Store stores a Uint64x2 to an array. // //go:noescape func (x Uint64x2) Store(y *[2]uint64) // LoadMaskedUint64x2 loads a Uint64x2 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // @@ -288,14 +288,14 @@ func (x Uint64x2) Store(y *[2]uint64) func LoadMaskedUint64x2(y *[2]uint64, mask Mask64x2) Uint64x2 // StoreMasked stores a Uint64x2 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Uint64x2) StoreMasked(y *[2]uint64, mask Mask64x2) -// Mask8x16 is a 128-bit SIMD vector of 16 int8 +// Mask8x16 is a mask for a SIMD vector of 16 8-bit elements. type Mask8x16 struct { int8x16 v128 vals [16]int8 @@ -308,10 +308,10 @@ func Mask8x16FromBits(y uint16) Mask8x16 // ToBits constructs a bitmap from a Mask8x16, where 1 means set for the indexed element, 0 means unset. // -// Asm: KMOVB, CPU Features: AVX512 +// Asm: VPMOVMSKB, CPU Features: AVX func (x Mask8x16) ToBits() uint16 -// Mask16x8 is a 128-bit SIMD vector of 8 int16 +// Mask16x8 is a mask for a SIMD vector of 8 16-bit elements. type Mask16x8 struct { int16x8 v128 vals [8]int16 @@ -327,7 +327,7 @@ func Mask16x8FromBits(y uint8) Mask16x8 // Asm: KMOVW, CPU Features: AVX512 func (x Mask16x8) ToBits() uint8 -// Mask32x4 is a 128-bit SIMD vector of 4 int32 +// Mask32x4 is a mask for a SIMD vector of 4 32-bit elements. type Mask32x4 struct { int32x4 v128 vals [4]int32 @@ -342,10 +342,10 @@ func Mask32x4FromBits(y uint8) Mask32x4 // ToBits constructs a bitmap from a Mask32x4, where 1 means set for the indexed element, 0 means unset. // Only the lower 4 bits of y are used. // -// Asm: KMOVD, CPU Features: AVX512 +// Asm: VMOVMSKPS, CPU Features: AVX func (x Mask32x4) ToBits() uint8 -// Mask64x2 is a 128-bit SIMD vector of 2 int64 +// Mask64x2 is a mask for a SIMD vector of 2 64-bit elements. type Mask64x2 struct { int64x2 v128 vals [2]int64 @@ -360,7 +360,7 @@ func Mask64x2FromBits(y uint8) Mask64x2 // ToBits constructs a bitmap from a Mask64x2, where 1 means set for the indexed element, 0 means unset. // Only the lower 2 bits of y are used. // -// Asm: KMOVQ, CPU Features: AVX512 +// Asm: VMOVMSKPD, CPU Features: AVX func (x Mask64x2) ToBits() uint8 // v256 is a tag type that tells the compiler that this is really 256-bit SIMD @@ -368,27 +368,27 @@ type v256 struct { _256 [0]func() // uncomparable } -// Float32x8 is a 256-bit SIMD vector of 8 float32 +// Float32x8 is a 256-bit SIMD vector of 8 float32s. type Float32x8 struct { float32x8 v256 vals [8]float32 } -// Len returns the number of elements in a Float32x8 +// Len returns the number of elements in a Float32x8. func (x Float32x8) Len() int { return 8 } -// LoadFloat32x8 loads a Float32x8 from an array +// LoadFloat32x8 loads a Float32x8 from an array. // //go:noescape func LoadFloat32x8(y *[8]float32) Float32x8 -// Store stores a Float32x8 to an array +// Store stores a Float32x8 to an array. // //go:noescape func (x Float32x8) Store(y *[8]float32) // LoadMaskedFloat32x8 loads a Float32x8 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // @@ -396,34 +396,34 @@ func (x Float32x8) Store(y *[8]float32) func LoadMaskedFloat32x8(y *[8]float32, mask Mask32x8) Float32x8 // StoreMasked stores a Float32x8 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Float32x8) StoreMasked(y *[8]float32, mask Mask32x8) -// Float64x4 is a 256-bit SIMD vector of 4 float64 +// Float64x4 is a 256-bit SIMD vector of 4 float64s. type Float64x4 struct { float64x4 v256 vals [4]float64 } -// Len returns the number of elements in a Float64x4 +// Len returns the number of elements in a Float64x4. func (x Float64x4) Len() int { return 4 } -// LoadFloat64x4 loads a Float64x4 from an array +// LoadFloat64x4 loads a Float64x4 from an array. // //go:noescape func LoadFloat64x4(y *[4]float64) Float64x4 -// Store stores a Float64x4 to an array +// Store stores a Float64x4 to an array. // //go:noescape func (x Float64x4) Store(y *[4]float64) // LoadMaskedFloat64x4 loads a Float64x4 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // @@ -431,180 +431,180 @@ func (x Float64x4) Store(y *[4]float64) func LoadMaskedFloat64x4(y *[4]float64, mask Mask64x4) Float64x4 // StoreMasked stores a Float64x4 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Float64x4) StoreMasked(y *[4]float64, mask Mask64x4) -// Int8x32 is a 256-bit SIMD vector of 32 int8 +// Int8x32 is a 256-bit SIMD vector of 32 int8s. type Int8x32 struct { int8x32 v256 vals [32]int8 } -// Len returns the number of elements in a Int8x32 +// Len returns the number of elements in an Int8x32. func (x Int8x32) Len() int { return 32 } -// LoadInt8x32 loads a Int8x32 from an array +// LoadInt8x32 loads an Int8x32 from an array. // //go:noescape func LoadInt8x32(y *[32]int8) Int8x32 -// Store stores a Int8x32 to an array +// Store stores an Int8x32 to an array. // //go:noescape func (x Int8x32) Store(y *[32]int8) -// Int16x16 is a 256-bit SIMD vector of 16 int16 +// Int16x16 is a 256-bit SIMD vector of 16 int16s. type Int16x16 struct { int16x16 v256 vals [16]int16 } -// Len returns the number of elements in a Int16x16 +// Len returns the number of elements in an Int16x16. func (x Int16x16) Len() int { return 16 } -// LoadInt16x16 loads a Int16x16 from an array +// LoadInt16x16 loads an Int16x16 from an array. // //go:noescape func LoadInt16x16(y *[16]int16) Int16x16 -// Store stores a Int16x16 to an array +// Store stores an Int16x16 to an array. // //go:noescape func (x Int16x16) Store(y *[16]int16) -// Int32x8 is a 256-bit SIMD vector of 8 int32 +// Int32x8 is a 256-bit SIMD vector of 8 int32s. type Int32x8 struct { int32x8 v256 vals [8]int32 } -// Len returns the number of elements in a Int32x8 +// Len returns the number of elements in an Int32x8. func (x Int32x8) Len() int { return 8 } -// LoadInt32x8 loads a Int32x8 from an array +// LoadInt32x8 loads an Int32x8 from an array. // //go:noescape func LoadInt32x8(y *[8]int32) Int32x8 -// Store stores a Int32x8 to an array +// Store stores an Int32x8 to an array. // //go:noescape func (x Int32x8) Store(y *[8]int32) -// LoadMaskedInt32x8 loads a Int32x8 from an array, -// at those elements enabled by mask +// LoadMaskedInt32x8 loads an Int32x8 from an array, +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func LoadMaskedInt32x8(y *[8]int32, mask Mask32x8) Int32x8 -// StoreMasked stores a Int32x8 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int32x8 to an array, +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Int32x8) StoreMasked(y *[8]int32, mask Mask32x8) -// Int64x4 is a 256-bit SIMD vector of 4 int64 +// Int64x4 is a 256-bit SIMD vector of 4 int64s. type Int64x4 struct { int64x4 v256 vals [4]int64 } -// Len returns the number of elements in a Int64x4 +// Len returns the number of elements in an Int64x4. func (x Int64x4) Len() int { return 4 } -// LoadInt64x4 loads a Int64x4 from an array +// LoadInt64x4 loads an Int64x4 from an array. // //go:noescape func LoadInt64x4(y *[4]int64) Int64x4 -// Store stores a Int64x4 to an array +// Store stores an Int64x4 to an array. // //go:noescape func (x Int64x4) Store(y *[4]int64) -// LoadMaskedInt64x4 loads a Int64x4 from an array, -// at those elements enabled by mask +// LoadMaskedInt64x4 loads an Int64x4 from an array, +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func LoadMaskedInt64x4(y *[4]int64, mask Mask64x4) Int64x4 -// StoreMasked stores a Int64x4 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int64x4 to an array, +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Int64x4) StoreMasked(y *[4]int64, mask Mask64x4) -// Uint8x32 is a 256-bit SIMD vector of 32 uint8 +// Uint8x32 is a 256-bit SIMD vector of 32 uint8s. type Uint8x32 struct { uint8x32 v256 vals [32]uint8 } -// Len returns the number of elements in a Uint8x32 +// Len returns the number of elements in a Uint8x32. func (x Uint8x32) Len() int { return 32 } -// LoadUint8x32 loads a Uint8x32 from an array +// LoadUint8x32 loads a Uint8x32 from an array. // //go:noescape func LoadUint8x32(y *[32]uint8) Uint8x32 -// Store stores a Uint8x32 to an array +// Store stores a Uint8x32 to an array. // //go:noescape func (x Uint8x32) Store(y *[32]uint8) -// Uint16x16 is a 256-bit SIMD vector of 16 uint16 +// Uint16x16 is a 256-bit SIMD vector of 16 uint16s. type Uint16x16 struct { uint16x16 v256 vals [16]uint16 } -// Len returns the number of elements in a Uint16x16 +// Len returns the number of elements in a Uint16x16. func (x Uint16x16) Len() int { return 16 } -// LoadUint16x16 loads a Uint16x16 from an array +// LoadUint16x16 loads a Uint16x16 from an array. // //go:noescape func LoadUint16x16(y *[16]uint16) Uint16x16 -// Store stores a Uint16x16 to an array +// Store stores a Uint16x16 to an array. // //go:noescape func (x Uint16x16) Store(y *[16]uint16) -// Uint32x8 is a 256-bit SIMD vector of 8 uint32 +// Uint32x8 is a 256-bit SIMD vector of 8 uint32s. type Uint32x8 struct { uint32x8 v256 vals [8]uint32 } -// Len returns the number of elements in a Uint32x8 +// Len returns the number of elements in a Uint32x8. func (x Uint32x8) Len() int { return 8 } -// LoadUint32x8 loads a Uint32x8 from an array +// LoadUint32x8 loads a Uint32x8 from an array. // //go:noescape func LoadUint32x8(y *[8]uint32) Uint32x8 -// Store stores a Uint32x8 to an array +// Store stores a Uint32x8 to an array. // //go:noescape func (x Uint32x8) Store(y *[8]uint32) // LoadMaskedUint32x8 loads a Uint32x8 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // @@ -612,34 +612,34 @@ func (x Uint32x8) Store(y *[8]uint32) func LoadMaskedUint32x8(y *[8]uint32, mask Mask32x8) Uint32x8 // StoreMasked stores a Uint32x8 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVD, CPU Feature: AVX2 // //go:noescape func (x Uint32x8) StoreMasked(y *[8]uint32, mask Mask32x8) -// Uint64x4 is a 256-bit SIMD vector of 4 uint64 +// Uint64x4 is a 256-bit SIMD vector of 4 uint64s. type Uint64x4 struct { uint64x4 v256 vals [4]uint64 } -// Len returns the number of elements in a Uint64x4 +// Len returns the number of elements in a Uint64x4. func (x Uint64x4) Len() int { return 4 } -// LoadUint64x4 loads a Uint64x4 from an array +// LoadUint64x4 loads a Uint64x4 from an array. // //go:noescape func LoadUint64x4(y *[4]uint64) Uint64x4 -// Store stores a Uint64x4 to an array +// Store stores a Uint64x4 to an array. // //go:noescape func (x Uint64x4) Store(y *[4]uint64) // LoadMaskedUint64x4 loads a Uint64x4 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // @@ -647,14 +647,14 @@ func (x Uint64x4) Store(y *[4]uint64) func LoadMaskedUint64x4(y *[4]uint64, mask Mask64x4) Uint64x4 // StoreMasked stores a Uint64x4 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMASKMOVQ, CPU Feature: AVX2 // //go:noescape func (x Uint64x4) StoreMasked(y *[4]uint64, mask Mask64x4) -// Mask8x32 is a 256-bit SIMD vector of 32 int8 +// Mask8x32 is a mask for a SIMD vector of 32 8-bit elements. type Mask8x32 struct { int8x32 v256 vals [32]int8 @@ -667,10 +667,10 @@ func Mask8x32FromBits(y uint32) Mask8x32 // ToBits constructs a bitmap from a Mask8x32, where 1 means set for the indexed element, 0 means unset. // -// Asm: KMOVB, CPU Features: AVX512 +// Asm: VPMOVMSKB, CPU Features: AVX2 func (x Mask8x32) ToBits() uint32 -// Mask16x16 is a 256-bit SIMD vector of 16 int16 +// Mask16x16 is a mask for a SIMD vector of 16 16-bit elements. type Mask16x16 struct { int16x16 v256 vals [16]int16 @@ -686,7 +686,7 @@ func Mask16x16FromBits(y uint16) Mask16x16 // Asm: KMOVW, CPU Features: AVX512 func (x Mask16x16) ToBits() uint16 -// Mask32x8 is a 256-bit SIMD vector of 8 int32 +// Mask32x8 is a mask for a SIMD vector of 8 32-bit elements. type Mask32x8 struct { int32x8 v256 vals [8]int32 @@ -699,10 +699,10 @@ func Mask32x8FromBits(y uint8) Mask32x8 // ToBits constructs a bitmap from a Mask32x8, where 1 means set for the indexed element, 0 means unset. // -// Asm: KMOVD, CPU Features: AVX512 +// Asm: VMOVMSKPS, CPU Features: AVX func (x Mask32x8) ToBits() uint8 -// Mask64x4 is a 256-bit SIMD vector of 4 int64 +// Mask64x4 is a mask for a SIMD vector of 4 64-bit elements. type Mask64x4 struct { int64x4 v256 vals [4]int64 @@ -717,7 +717,7 @@ func Mask64x4FromBits(y uint8) Mask64x4 // ToBits constructs a bitmap from a Mask64x4, where 1 means set for the indexed element, 0 means unset. // Only the lower 4 bits of y are used. // -// Asm: KMOVQ, CPU Features: AVX512 +// Asm: VMOVMSKPD, CPU Features: AVX func (x Mask64x4) ToBits() uint8 // v512 is a tag type that tells the compiler that this is really 512-bit SIMD @@ -725,27 +725,27 @@ type v512 struct { _512 [0]func() // uncomparable } -// Float32x16 is a 512-bit SIMD vector of 16 float32 +// Float32x16 is a 512-bit SIMD vector of 16 float32s. type Float32x16 struct { float32x16 v512 vals [16]float32 } -// Len returns the number of elements in a Float32x16 +// Len returns the number of elements in a Float32x16. func (x Float32x16) Len() int { return 16 } -// LoadFloat32x16 loads a Float32x16 from an array +// LoadFloat32x16 loads a Float32x16 from an array. // //go:noescape func LoadFloat32x16(y *[16]float32) Float32x16 -// Store stores a Float32x16 to an array +// Store stores a Float32x16 to an array. // //go:noescape func (x Float32x16) Store(y *[16]float32) // LoadMaskedFloat32x16 loads a Float32x16 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU32.Z, CPU Feature: AVX512 // @@ -753,34 +753,34 @@ func (x Float32x16) Store(y *[16]float32) func LoadMaskedFloat32x16(y *[16]float32, mask Mask32x16) Float32x16 // StoreMasked stores a Float32x16 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU32, CPU Feature: AVX512 // //go:noescape func (x Float32x16) StoreMasked(y *[16]float32, mask Mask32x16) -// Float64x8 is a 512-bit SIMD vector of 8 float64 +// Float64x8 is a 512-bit SIMD vector of 8 float64s. type Float64x8 struct { float64x8 v512 vals [8]float64 } -// Len returns the number of elements in a Float64x8 +// Len returns the number of elements in a Float64x8. func (x Float64x8) Len() int { return 8 } -// LoadFloat64x8 loads a Float64x8 from an array +// LoadFloat64x8 loads a Float64x8 from an array. // //go:noescape func LoadFloat64x8(y *[8]float64) Float64x8 -// Store stores a Float64x8 to an array +// Store stores a Float64x8 to an array. // //go:noescape func (x Float64x8) Store(y *[8]float64) // LoadMaskedFloat64x8 loads a Float64x8 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU64.Z, CPU Feature: AVX512 // @@ -788,174 +788,174 @@ func (x Float64x8) Store(y *[8]float64) func LoadMaskedFloat64x8(y *[8]float64, mask Mask64x8) Float64x8 // StoreMasked stores a Float64x8 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU64, CPU Feature: AVX512 // //go:noescape func (x Float64x8) StoreMasked(y *[8]float64, mask Mask64x8) -// Int8x64 is a 512-bit SIMD vector of 64 int8 +// Int8x64 is a 512-bit SIMD vector of 64 int8s. type Int8x64 struct { int8x64 v512 vals [64]int8 } -// Len returns the number of elements in a Int8x64 +// Len returns the number of elements in an Int8x64. func (x Int8x64) Len() int { return 64 } -// LoadInt8x64 loads a Int8x64 from an array +// LoadInt8x64 loads an Int8x64 from an array. // //go:noescape func LoadInt8x64(y *[64]int8) Int8x64 -// Store stores a Int8x64 to an array +// Store stores an Int8x64 to an array. // //go:noescape func (x Int8x64) Store(y *[64]int8) -// LoadMaskedInt8x64 loads a Int8x64 from an array, -// at those elements enabled by mask +// LoadMaskedInt8x64 loads an Int8x64 from an array, +// at those elements enabled by mask. // // Asm: VMOVDQU8.Z, CPU Feature: AVX512 // //go:noescape func LoadMaskedInt8x64(y *[64]int8, mask Mask8x64) Int8x64 -// StoreMasked stores a Int8x64 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int8x64 to an array, +// at those elements enabled by mask. // // Asm: VMOVDQU8, CPU Feature: AVX512 // //go:noescape func (x Int8x64) StoreMasked(y *[64]int8, mask Mask8x64) -// Int16x32 is a 512-bit SIMD vector of 32 int16 +// Int16x32 is a 512-bit SIMD vector of 32 int16s. type Int16x32 struct { int16x32 v512 vals [32]int16 } -// Len returns the number of elements in a Int16x32 +// Len returns the number of elements in an Int16x32. func (x Int16x32) Len() int { return 32 } -// LoadInt16x32 loads a Int16x32 from an array +// LoadInt16x32 loads an Int16x32 from an array. // //go:noescape func LoadInt16x32(y *[32]int16) Int16x32 -// Store stores a Int16x32 to an array +// Store stores an Int16x32 to an array. // //go:noescape func (x Int16x32) Store(y *[32]int16) -// LoadMaskedInt16x32 loads a Int16x32 from an array, -// at those elements enabled by mask +// LoadMaskedInt16x32 loads an Int16x32 from an array, +// at those elements enabled by mask. // // Asm: VMOVDQU16.Z, CPU Feature: AVX512 // //go:noescape func LoadMaskedInt16x32(y *[32]int16, mask Mask16x32) Int16x32 -// StoreMasked stores a Int16x32 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int16x32 to an array, +// at those elements enabled by mask. // // Asm: VMOVDQU16, CPU Feature: AVX512 // //go:noescape func (x Int16x32) StoreMasked(y *[32]int16, mask Mask16x32) -// Int32x16 is a 512-bit SIMD vector of 16 int32 +// Int32x16 is a 512-bit SIMD vector of 16 int32s. type Int32x16 struct { int32x16 v512 vals [16]int32 } -// Len returns the number of elements in a Int32x16 +// Len returns the number of elements in an Int32x16. func (x Int32x16) Len() int { return 16 } -// LoadInt32x16 loads a Int32x16 from an array +// LoadInt32x16 loads an Int32x16 from an array. // //go:noescape func LoadInt32x16(y *[16]int32) Int32x16 -// Store stores a Int32x16 to an array +// Store stores an Int32x16 to an array. // //go:noescape func (x Int32x16) Store(y *[16]int32) -// LoadMaskedInt32x16 loads a Int32x16 from an array, -// at those elements enabled by mask +// LoadMaskedInt32x16 loads an Int32x16 from an array, +// at those elements enabled by mask. // // Asm: VMOVDQU32.Z, CPU Feature: AVX512 // //go:noescape func LoadMaskedInt32x16(y *[16]int32, mask Mask32x16) Int32x16 -// StoreMasked stores a Int32x16 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int32x16 to an array, +// at those elements enabled by mask. // // Asm: VMOVDQU32, CPU Feature: AVX512 // //go:noescape func (x Int32x16) StoreMasked(y *[16]int32, mask Mask32x16) -// Int64x8 is a 512-bit SIMD vector of 8 int64 +// Int64x8 is a 512-bit SIMD vector of 8 int64s. type Int64x8 struct { int64x8 v512 vals [8]int64 } -// Len returns the number of elements in a Int64x8 +// Len returns the number of elements in an Int64x8. func (x Int64x8) Len() int { return 8 } -// LoadInt64x8 loads a Int64x8 from an array +// LoadInt64x8 loads an Int64x8 from an array. // //go:noescape func LoadInt64x8(y *[8]int64) Int64x8 -// Store stores a Int64x8 to an array +// Store stores an Int64x8 to an array. // //go:noescape func (x Int64x8) Store(y *[8]int64) -// LoadMaskedInt64x8 loads a Int64x8 from an array, -// at those elements enabled by mask +// LoadMaskedInt64x8 loads an Int64x8 from an array, +// at those elements enabled by mask. // // Asm: VMOVDQU64.Z, CPU Feature: AVX512 // //go:noescape func LoadMaskedInt64x8(y *[8]int64, mask Mask64x8) Int64x8 -// StoreMasked stores a Int64x8 to an array, -// at those elements enabled by mask +// StoreMasked stores an Int64x8 to an array, +// at those elements enabled by mask. // // Asm: VMOVDQU64, CPU Feature: AVX512 // //go:noescape func (x Int64x8) StoreMasked(y *[8]int64, mask Mask64x8) -// Uint8x64 is a 512-bit SIMD vector of 64 uint8 +// Uint8x64 is a 512-bit SIMD vector of 64 uint8s. type Uint8x64 struct { uint8x64 v512 vals [64]uint8 } -// Len returns the number of elements in a Uint8x64 +// Len returns the number of elements in a Uint8x64. func (x Uint8x64) Len() int { return 64 } -// LoadUint8x64 loads a Uint8x64 from an array +// LoadUint8x64 loads a Uint8x64 from an array. // //go:noescape func LoadUint8x64(y *[64]uint8) Uint8x64 -// Store stores a Uint8x64 to an array +// Store stores a Uint8x64 to an array. // //go:noescape func (x Uint8x64) Store(y *[64]uint8) // LoadMaskedUint8x64 loads a Uint8x64 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU8.Z, CPU Feature: AVX512 // @@ -963,34 +963,34 @@ func (x Uint8x64) Store(y *[64]uint8) func LoadMaskedUint8x64(y *[64]uint8, mask Mask8x64) Uint8x64 // StoreMasked stores a Uint8x64 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU8, CPU Feature: AVX512 // //go:noescape func (x Uint8x64) StoreMasked(y *[64]uint8, mask Mask8x64) -// Uint16x32 is a 512-bit SIMD vector of 32 uint16 +// Uint16x32 is a 512-bit SIMD vector of 32 uint16s. type Uint16x32 struct { uint16x32 v512 vals [32]uint16 } -// Len returns the number of elements in a Uint16x32 +// Len returns the number of elements in a Uint16x32. func (x Uint16x32) Len() int { return 32 } -// LoadUint16x32 loads a Uint16x32 from an array +// LoadUint16x32 loads a Uint16x32 from an array. // //go:noescape func LoadUint16x32(y *[32]uint16) Uint16x32 -// Store stores a Uint16x32 to an array +// Store stores a Uint16x32 to an array. // //go:noescape func (x Uint16x32) Store(y *[32]uint16) // LoadMaskedUint16x32 loads a Uint16x32 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU16.Z, CPU Feature: AVX512 // @@ -998,34 +998,34 @@ func (x Uint16x32) Store(y *[32]uint16) func LoadMaskedUint16x32(y *[32]uint16, mask Mask16x32) Uint16x32 // StoreMasked stores a Uint16x32 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU16, CPU Feature: AVX512 // //go:noescape func (x Uint16x32) StoreMasked(y *[32]uint16, mask Mask16x32) -// Uint32x16 is a 512-bit SIMD vector of 16 uint32 +// Uint32x16 is a 512-bit SIMD vector of 16 uint32s. type Uint32x16 struct { uint32x16 v512 vals [16]uint32 } -// Len returns the number of elements in a Uint32x16 +// Len returns the number of elements in a Uint32x16. func (x Uint32x16) Len() int { return 16 } -// LoadUint32x16 loads a Uint32x16 from an array +// LoadUint32x16 loads a Uint32x16 from an array. // //go:noescape func LoadUint32x16(y *[16]uint32) Uint32x16 -// Store stores a Uint32x16 to an array +// Store stores a Uint32x16 to an array. // //go:noescape func (x Uint32x16) Store(y *[16]uint32) // LoadMaskedUint32x16 loads a Uint32x16 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU32.Z, CPU Feature: AVX512 // @@ -1033,34 +1033,34 @@ func (x Uint32x16) Store(y *[16]uint32) func LoadMaskedUint32x16(y *[16]uint32, mask Mask32x16) Uint32x16 // StoreMasked stores a Uint32x16 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU32, CPU Feature: AVX512 // //go:noescape func (x Uint32x16) StoreMasked(y *[16]uint32, mask Mask32x16) -// Uint64x8 is a 512-bit SIMD vector of 8 uint64 +// Uint64x8 is a 512-bit SIMD vector of 8 uint64s. type Uint64x8 struct { uint64x8 v512 vals [8]uint64 } -// Len returns the number of elements in a Uint64x8 +// Len returns the number of elements in a Uint64x8. func (x Uint64x8) Len() int { return 8 } -// LoadUint64x8 loads a Uint64x8 from an array +// LoadUint64x8 loads a Uint64x8 from an array. // //go:noescape func LoadUint64x8(y *[8]uint64) Uint64x8 -// Store stores a Uint64x8 to an array +// Store stores a Uint64x8 to an array. // //go:noescape func (x Uint64x8) Store(y *[8]uint64) // LoadMaskedUint64x8 loads a Uint64x8 from an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU64.Z, CPU Feature: AVX512 // @@ -1068,14 +1068,14 @@ func (x Uint64x8) Store(y *[8]uint64) func LoadMaskedUint64x8(y *[8]uint64, mask Mask64x8) Uint64x8 // StoreMasked stores a Uint64x8 to an array, -// at those elements enabled by mask +// at those elements enabled by mask. // // Asm: VMOVDQU64, CPU Feature: AVX512 // //go:noescape func (x Uint64x8) StoreMasked(y *[8]uint64, mask Mask64x8) -// Mask8x64 is a 512-bit SIMD vector of 64 int8 +// Mask8x64 is a mask for a SIMD vector of 64 8-bit elements. type Mask8x64 struct { int8x64 v512 vals [64]int8 @@ -1091,7 +1091,7 @@ func Mask8x64FromBits(y uint64) Mask8x64 // Asm: KMOVB, CPU Features: AVX512 func (x Mask8x64) ToBits() uint64 -// Mask16x32 is a 512-bit SIMD vector of 32 int16 +// Mask16x32 is a mask for a SIMD vector of 32 16-bit elements. type Mask16x32 struct { int16x32 v512 vals [32]int16 @@ -1107,7 +1107,7 @@ func Mask16x32FromBits(y uint32) Mask16x32 // Asm: KMOVW, CPU Features: AVX512 func (x Mask16x32) ToBits() uint32 -// Mask32x16 is a 512-bit SIMD vector of 16 int32 +// Mask32x16 is a mask for a SIMD vector of 16 32-bit elements. type Mask32x16 struct { int32x16 v512 vals [16]int32 @@ -1123,7 +1123,7 @@ func Mask32x16FromBits(y uint16) Mask32x16 // Asm: KMOVD, CPU Features: AVX512 func (x Mask32x16) ToBits() uint16 -// Mask64x8 is a 512-bit SIMD vector of 8 int64 +// Mask64x8 is a mask for a SIMD vector of 8 64-bit elements. type Mask64x8 struct { int64x8 v512 vals [8]int64 diff --git a/src/simd/archsimd/unsafe_helpers.go b/src/simd/archsimd/unsafe_helpers.go index 0123ad77c5..7b98053e70 100644 --- a/src/simd/archsimd/unsafe_helpers.go +++ b/src/simd/archsimd/unsafe_helpers.go @@ -1,4 +1,4 @@ -// Code generated by 'go run genfiles.go'; DO NOT EDIT. +// Code generated by 'tmplgen'; DO NOT EDIT. //go:build goexperiment.simd diff --git a/src/time/export_test.go b/src/time/export_test.go index a4940d12f9..78ce2ad00d 100644 --- a/src/time/export_test.go +++ b/src/time/export_test.go @@ -40,6 +40,7 @@ var ( Tzset = tzset TzsetName = tzsetName TzsetOffset = tzsetOffset + AsynctimerChan = asynctimerchan ) func LoadFromEmbeddedTZData(zone string) (string, error) { diff --git a/src/time/tick_test.go b/src/time/tick_test.go index dcbbcdb145..9b39d28143 100644 --- a/src/time/tick_test.go +++ b/src/time/tick_test.go @@ -266,6 +266,10 @@ func BenchmarkTickerResetNaive(b *testing.B) { } func TestTimerGC(t *testing.T) { + if AsynctimerChan.Value() == "1" { + t.Skip("skipping TestTimerGC with asynctimerchan=1") + } + run := func(t *testing.T, what string, f func()) { t.Helper() t.Run(what, func(t *testing.T) { diff --git a/test/cmplxdivide.go b/test/cmplxdivide.go index 49cd5bf582..4b8d549fc4 100644 --- a/test/cmplxdivide.go +++ b/test/cmplxdivide.go @@ -35,7 +35,7 @@ func main() { fmt.Printf("BUG\n") bad = true } - fmt.Printf("%v/%v: expected %v error; got %v\n", t.f, t.g, t.out, x) + fmt.Printf("%v/%v: got %v, want %v\n", t.f, t.g, x, t.out) } } if bad { diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 39969dcdb2..d9c567b078 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -8,274 +8,387 @@ package codegen import "math/bits" -/************************************ - * 64-bit instructions - ************************************/ +// +// 64 bit instructions +// -func bitcheck64_constleft(a uint64) (n int) { - // amd64:"BTQ [$]63" +func bitsCheckConstLeftShiftU64(a uint64) (n int) { + // amd64:"BTQ [$]63," + // arm64:"TBNZ [$]63," + // riscv64:"MOV [$]" "AND" "BNEZ" if a&(1<<63) != 0 { return 1 } - // amd64:"BTQ [$]60" + // amd64:"BTQ [$]60," + // arm64:"TBNZ [$]60," + // riscv64:"MOV [$]" "AND" "BNEZ" if a&(1<<60) != 0 { return 1 } - // amd64:"BTL [$]0" + // amd64:"BTL [$]0," + // arm64:"TBZ [$]0," + // riscv64:"ANDI" "BEQZ" if a&(1<<0) != 0 { return 1 } return 0 } -func bitcheck64_constright(a [8]uint64) (n int) { - // amd64:"BTQ [$]63" +func bitsCheckConstRightShiftU64(a [8]uint64) (n int) { + // amd64:"BTQ [$]63," + // arm64:"LSR [$]63," "TBNZ [$]0," + // riscv64:"SRLI" "ANDI" "BNEZ" if (a[0]>>63)&1 != 0 { return 1 } - // amd64:"BTQ [$]63" + // amd64:"BTQ [$]63," + // arm64:"LSR [$]63," "CBNZ" + // riscv64:"SRLI" "BNEZ" if a[1]>>63 != 0 { return 1 } - // amd64:"BTQ [$]63" + // amd64:"BTQ [$]63," + // arm64:"LSR [$]63," "CBZ" + // riscv64:"SRLI" "BEQZ" if a[2]>>63 == 0 { return 1 } - // amd64:"BTQ [$]60" + // amd64:"BTQ [$]60," + // arm64:"LSR [$]60," "TBZ [$]0," + // riscv64:"SRLI", "ANDI" "BEQZ" if (a[3]>>60)&1 == 0 { return 1 } - // amd64:"BTL [$]1" + // amd64:"BTL [$]1," + // arm64:"LSR [$]1," "TBZ [$]0," + // riscv64:"SRLI" "ANDI" "BEQZ" if (a[4]>>1)&1 == 0 { return 1 } - // amd64:"BTL [$]0" + // amd64:"BTL [$]0," + // arm64:"TBZ [$]0," -"LSR" + // riscv64:"ANDI" "BEQZ" -"SRLI" if (a[5]>>0)&1 == 0 { return 1 } - // amd64:"BTL [$]7" + // amd64:"BTL [$]7," + // arm64:"LSR [$]5," "TBNZ [$]2," + // riscv64:"SRLI" "ANDI" "BNEZ" if (a[6]>>5)&4 == 0 { return 1 } return 0 } -func bitcheck64_var(a, b uint64) (n int) { +func bitsCheckVarU64(a, b uint64) (n int) { // amd64:"BTQ" + // arm64:"MOVD [$]1," "LSL" "TST" + // riscv64:"ANDI [$]63," "SLL " "AND " if a&(1<<(b&63)) != 0 { return 1 } - // amd64:"BTQ" -"BT. [$]0" + // amd64:"BTQ" -"BT. [$]0," + // arm64:"LSR" "TBZ [$]0," + // riscv64:"ANDI [$]63," "SRL" "ANDI [$]1," if (b>>(a&63))&1 != 0 { return 1 } return 0 } -func bitcheck64_mask(a uint64) (n int) { - // amd64:"BTQ [$]63" +func bitsCheckMaskU64(a uint64) (n int) { + // amd64:"BTQ [$]63," + // arm64:"TBNZ [$]63," + // riscv64:"MOV [$]" "AND" "BNEZ" if a&0x8000000000000000 != 0 { return 1 } - // amd64:"BTQ [$]59" + // amd64:"BTQ [$]59," + // arm64:"TBNZ [$]59," + // riscv64:"MOV [$]" "AND" "BNEZ" if a&0x800000000000000 != 0 { return 1 } - // amd64:"BTL [$]0" + // amd64:"BTL [$]0," + // arm64:"TBZ [$]0," + // riscv64:"ANDI" "BEQZ" if a&0x1 != 0 { return 1 } return 0 } -func biton64(a, b uint64) (n uint64) { +func bitsSetU64(a, b uint64) (n uint64) { // amd64:"BTSQ" + // arm64:"MOVD [$]1," "LSL" "ORR" + // riscv64:"ANDI" "SLL" "OR" n += b | (1 << (a & 63)) - // amd64:"BTSQ [$]63" + // amd64:"BTSQ [$]63," + // arm64:"ORR [$]-9223372036854775808," + // riscv64:"MOV [$]" "OR " n += a | (1 << 63) - // amd64:"BTSQ [$]60" + // amd64:"BTSQ [$]60," + // arm64:"ORR [$]1152921504606846976," + // riscv64:"MOV [$]" "OR " n += a | (1 << 60) - // amd64:"ORQ [$]1" + // amd64:"ORQ [$]1," + // arm64:"ORR [$]1," + // riscv64:"ORI" n += a | (1 << 0) return n } -func bitoff64(a, b uint64) (n uint64) { +func bitsClearU64(a, b uint64) (n uint64) { // amd64:"BTRQ" + // arm64:"MOVD [$]1," "LSL" "BIC" + // riscv64:"ANDI" "SLL" "ANDN" n += b &^ (1 << (a & 63)) - // amd64:"BTRQ [$]63" + // amd64:"BTRQ [$]63," + // arm64:"AND [$]9223372036854775807," + // riscv64:"MOV [$]" "AND " n += a &^ (1 << 63) - // amd64:"BTRQ [$]60" + // amd64:"BTRQ [$]60," + // arm64:"AND [$]-1152921504606846977," + // riscv64:"MOV [$]" "AND " n += a &^ (1 << 60) // amd64:"ANDQ [$]-2" + // arm64:"AND [$]-2" + // riscv64:"ANDI [$]-2" n += a &^ (1 << 0) return n } -func clearLastBit(x int64, y int32) (int64, int32) { - // amd64:"ANDQ [$]-2" +func bitsClearLowest(x int64, y int32) (int64, int32) { + // amd64:"ANDQ [$]-2," + // arm64:"AND [$]-2," + // riscv64:"ANDI [$]-2," a := (x >> 1) << 1 - // amd64:"ANDL [$]-2" + // amd64:"ANDL [$]-2," + // arm64:"AND [$]-2," + // riscv64:"ANDI [$]-2," b := (y >> 1) << 1 return a, b } -func bitcompl64(a, b uint64) (n uint64) { +func bitsFlipU64(a, b uint64) (n uint64) { // amd64:"BTCQ" + // arm64:"MOVD [$]1," "LSL" "EOR" + // riscv64:"ANDI" "SLL" "XOR " n += b ^ (1 << (a & 63)) - // amd64:"BTCQ [$]63" + // amd64:"BTCQ [$]63," + // arm64:"EOR [$]-9223372036854775808," + // riscv64:"MOV [$]" "XOR " n += a ^ (1 << 63) - // amd64:"BTCQ [$]60" + // amd64:"BTCQ [$]60," + // arm64:"EOR [$]1152921504606846976," + // riscv64:"MOV [$]" "XOR " n += a ^ (1 << 60) - // amd64:"XORQ [$]1" + // amd64:"XORQ [$]1," + // arm64:"EOR [$]1," + // riscv64:"XORI [$]1," n += a ^ (1 << 0) return n } -/************************************ - * 32-bit instructions - ************************************/ +// +// 32 bit instructions +// -func bitcheck32_constleft(a uint32) (n int) { - // amd64:"BTL [$]31" +func bitsCheckConstShiftLeftU32(a uint32) (n int) { + // amd64:"BTL [$]31," + // arm64:"TBNZ [$]31," + // riscv64:"MOV [$]" "AND" "BNEZ" if a&(1<<31) != 0 { return 1 } - // amd64:"BTL [$]28" + // amd64:"BTL [$]28," + // arm64:"TBNZ [$]28," + // riscv64:"ANDI" "BNEZ" if a&(1<<28) != 0 { return 1 } - // amd64:"BTL [$]0" + // amd64:"BTL [$]0," + // arm64:"TBZ [$]0," + // riscv64:"ANDI" "BEQZ" if a&(1<<0) != 0 { return 1 } return 0 } -func bitcheck32_constright(a [8]uint32) (n int) { - // amd64:"BTL [$]31" +func bitsCheckConstRightShiftU32(a [8]uint32) (n int) { + // amd64:"BTL [$]31," + // arm64:"UBFX [$]31," "CBNZW" + // riscv64:"SRLI" "ANDI" "BNEZ" if (a[0]>>31)&1 != 0 { return 1 } - // amd64:"BTL [$]31" + // amd64:"BTL [$]31," + // arm64:"UBFX [$]31," "CBNZW" + // riscv64:"SRLI" "BNEZ" if a[1]>>31 != 0 { return 1 } - // amd64:"BTL [$]31" + // amd64:"BTL [$]31," + // arm64:"UBFX [$]31," "CBZW" + // riscv64:"SRLI" "BEQZ" if a[2]>>31 == 0 { return 1 } - // amd64:"BTL [$]28" + // amd64:"BTL [$]28," + // arm64:"UBFX [$]28," "TBZ" + // riscv64:"SRLI" "ANDI" "BEQZ" if (a[3]>>28)&1 == 0 { return 1 } - // amd64:"BTL [$]1" + // amd64:"BTL [$]1," + // arm64:"UBFX [$]1," "TBZ" + // riscv64:"SRLI" "ANDI" "BEQZ" if (a[4]>>1)&1 == 0 { return 1 } - // amd64:"BTL [$]0" + // amd64:"BTL [$]0," + // arm64:"TBZ" -"UBFX" -"SRL" + // riscv64:"ANDI" "BEQZ" -"SRLI " if (a[5]>>0)&1 == 0 { return 1 } - // amd64:"BTL [$]7" + // amd64:"BTL [$]7," + // arm64:"UBFX [$]5," "TBNZ" + // riscv64:"SRLI" "ANDI" "BNEZ" if (a[6]>>5)&4 == 0 { return 1 } return 0 } -func bitcheck32_var(a, b uint32) (n int) { +func bitsCheckVarU32(a, b uint32) (n int) { // amd64:"BTL" + // arm64:"AND [$]31," "MOVD [$]1," "LSL" "TSTW" + // riscv64:"ANDI [$]31," "SLL " "AND " if a&(1<<(b&31)) != 0 { return 1 } // amd64:"BTL" -"BT. [$]0" + // arm64:"AND [$]31," "LSR" "TBZ" + // riscv64:"ANDI [$]31," "SRLW " "ANDI [$]1," if (b>>(a&31))&1 != 0 { return 1 } return 0 } -func bitcheck32_mask(a uint32) (n int) { - // amd64:"BTL [$]31" +func bitsCheckMaskU32(a uint32) (n int) { + // amd64:"BTL [$]31," + // arm64:"TBNZ [$]31," + // riscv64:"MOV [$]" "AND" "BNEZ" if a&0x80000000 != 0 { return 1 } - // amd64:"BTL [$]27" + // amd64:"BTL [$]27," + // arm64:"TBNZ [$]27," + // riscv64:"ANDI" "BNEZ" if a&0x8000000 != 0 { return 1 } - // amd64:"BTL [$]0" + // amd64:"BTL [$]0," + // arm64:"TBZ [$]0," + // riscv64:"ANDI" "BEQZ" if a&0x1 != 0 { return 1 } return 0 } -func biton32(a, b uint32) (n uint32) { +func bitsSetU32(a, b uint32) (n uint32) { // amd64:"BTSL" + // arm64:"AND [$]31," "MOVD [$]1," "LSL" "ORR" + // riscv64:"ANDI" "SLL" "OR" n += b | (1 << (a & 31)) - // amd64:"ORL [$]-2147483648" + // amd64:"ORL [$]-2147483648," + // arm64:"ORR [$]-2147483648," + // riscv64:"ORI [$]-2147483648," n += a | (1 << 31) - // amd64:"ORL [$]268435456" + // amd64:"ORL [$]268435456," + // arm64:"ORR [$]268435456," + // riscv64:"ORI [$]268435456," n += a | (1 << 28) - // amd64:"ORL [$]1" + // amd64:"ORL [$]1," + // arm64:"ORR [$]1," + // riscv64:"ORI [$]1," n += a | (1 << 0) return n } -func bitoff32(a, b uint32) (n uint32) { +func bitsClearU32(a, b uint32) (n uint32) { // amd64:"BTRL" + // arm64:"AND [$]31," "MOVD [$]1," "LSL" "BIC" + // riscv64:"ANDI" "SLL" "ANDN" n += b &^ (1 << (a & 31)) - // amd64:"ANDL [$]2147483647" + // amd64:"ANDL [$]2147483647," + // arm64:"AND [$]2147483647," + // riscv64:"ANDI [$]2147483647," n += a &^ (1 << 31) - // amd64:"ANDL [$]-268435457" + // amd64:"ANDL [$]-268435457," + // arm64:"AND [$]-268435457," + // riscv64:"ANDI [$]-268435457," n += a &^ (1 << 28) - // amd64:"ANDL [$]-2" + // amd64:"ANDL [$]-2," + // arm64:"AND [$]-2," + // riscv64:"ANDI [$]-2," n += a &^ (1 << 0) return n } -func bitcompl32(a, b uint32) (n uint32) { +func bitsFlipU32(a, b uint32) (n uint32) { // amd64:"BTCL" + // arm64:"AND [$]31," "MOVD [$]1," "LSL" "EOR" + // riscv64:"ANDI" "SLL" "XOR " n += b ^ (1 << (a & 31)) - // amd64:"XORL [$]-2147483648" + // amd64:"XORL [$]-2147483648," + // arm64:"EOR [$]-2147483648," + // riscv64:"XORI [$]-2147483648," n += a ^ (1 << 31) - // amd64:"XORL [$]268435456" + // amd64:"XORL [$]268435456," + // arm64:"EOR [$]268435456," + // riscv64:"XORI [$]268435456," n += a ^ (1 << 28) - // amd64:"XORL [$]1" + // amd64:"XORL [$]1," + // arm64:"EOR [$]1," + // riscv64:"XORI [$]1," n += a ^ (1 << 0) return n } -// check direct operation on memory with constant and shifted constant sources -func bitOpOnMem(a []uint32, b, c, d uint32) { +func bitsOpOnMem(a []uint32, b, c, d uint32) { + // check direct operation on memory with constant + // amd64:`ANDL\s[$]200,\s\([A-Z][A-Z0-9]+\)` a[0] &= 200 // amd64:`ORL\s[$]220,\s4\([A-Z][A-Z0-9]+\)` @@ -284,24 +397,24 @@ func bitOpOnMem(a []uint32, b, c, d uint32) { a[2] ^= 240 } -func bitcheckMostNegative(b uint8) bool { +func bitsCheckMostNegative(b uint8) bool { // amd64:"TESTB" + // arm64:"TSTW" "CSET" + // riscv64:"ANDI [$]128," "SNEZ" -"ADDI" return b&0x80 == 0x80 } -// Check AND masking on arm64 (Issue #19857) - -func and_mask_1(a uint64) uint64 { +func bitsIssue19857a(a uint64) uint64 { // arm64:`AND ` return a & ((1 << 63) - 1) } -func and_mask_2(a uint64) uint64 { +func bitsIssue19857b(a uint64) uint64 { // arm64:`AND ` return a & (1 << 63) } -func and_mask_3(a, b uint32) (uint32, uint32) { +func bitsIssue19857c(a, b uint32) (uint32, uint32) { // arm/7:`BIC`,-`AND` a &= 0xffffaaaa // arm/7:`BFC`,-`AND`,-`BIC` @@ -309,34 +422,39 @@ func and_mask_3(a, b uint32) (uint32, uint32) { return a, b } -// Check generation of arm64 BIC/EON/ORN instructions - -func op_bic(x, y uint32) uint32 { +func bitsAndNot(x, y uint32) uint32 { // arm64:`BIC `,-`AND` + // loong64:"ANDN " -"AND " + // riscv64:"ANDN" -"AND " return x &^ y } -func op_eon(x, y, z uint32, a []uint32, n, m uint64) uint64 { +func bitsXorNot(x, y, z uint32, a []uint32, n, m uint64) uint64 { // arm64:`EON `,-`EOR`,-`MVN` + // riscv64:"XNOR " -"MOV [$]" -"XOR" a[0] = x ^ (y ^ 0xffffffff) // arm64:`EON `,-`EOR`,-`MVN` + // riscv64:"XNOR" -"XOR" a[1] = ^(y ^ z) // arm64:`EON `,-`XOR` + // riscv64:"XNOR" -"XOR" -"NOT" a[2] = x ^ ^z // arm64:`EON `,-`EOR`,-`MVN` + // riscv64:"XNOR" -"MOV [$]" -"XOR" return n ^ (m ^ 0xffffffffffffffff) } -func op_orn(x, y uint32) uint32 { - // arm64:`ORN `,-`ORR` - // loong64:"ORN" ,-"OR " +func bitsOrNot(x, y uint32) uint32 { + // arm64:"ORN " -"ORR" + // loong64:"ORN" -"OR " + // riscv64:"ORN" -"OR " return x | ^y } -func op_nor(x int64, a []int64) { +func bitsNotOr(x int64, a []int64) { // loong64: "MOVV [$]0" "NOR R" a[0] = ^(0x1234 | x) // loong64:"NOR" -"XOR" @@ -345,64 +463,60 @@ func op_nor(x int64, a []int64) { a[2] = ^(0x12 | 0x34) } -func op_andn(x, y uint32) uint32 { - // loong64:"ANDN " -"AND " - return x &^ y -} - -// check bitsets -func bitSetPowerOf2Test(x int) bool { +func bitsSetPowerOf2Test(x int) bool { // amd64:"BTL [$]3" + // riscv64:"ANDI [$]8," "SNEZ" -"ADDI" return x&8 == 8 } -func bitSetTest(x int) bool { +func bitsSetTest(x int) bool { // amd64:"ANDL [$]9, AX" // amd64:"CMPQ AX, [$]9" + // riscv64:"ANDI [$]9," "ADDI [$]-9," "SEQZ" return x&9 == 9 } -// mask contiguous one bits -func cont1Mask64U(x uint64) uint64 { +func bitsMaskContiguousOnes64U(x uint64) uint64 { // s390x:"RISBGZ [$]16, [$]47, [$]0," return x & 0x0000ffffffff0000 } -// mask contiguous zero bits -func cont0Mask64U(x uint64) uint64 { +func bitsMaskContiguousZeroes64U(x uint64) uint64 { // s390x:"RISBGZ [$]48, [$]15, [$]0," return x & 0xffff00000000ffff } -func issue44228a(a []int64, i int) bool { +func bitsIssue44228a(a []int64, i int) bool { // amd64: "BTQ", -"SHL" return a[i>>6]&(1<<(i&63)) != 0 } -func issue44228b(a []int32, i int) bool { + +func bitsIssue44228b(a []int32, i int) bool { // amd64: "BTL", -"SHL" return a[i>>5]&(1<<(i&31)) != 0 } -func issue48467(x, y uint64) uint64 { +func bitsIssue48467(x, y uint64) uint64 { // arm64: -"NEG" d, borrow := bits.Sub64(x, y, 0) return x - d&(-borrow) } -func foldConst(x, y uint64) uint64 { +func bitsFoldConst(x, y uint64) uint64 { // arm64: "ADDS [$]7" -"MOVD [$]7" // ppc64x: "ADDC [$]7," d, b := bits.Add64(x, 7, 0) return b & d } -func foldConstOutOfRange(a uint64) uint64 { +func bitsFoldConstOutOfRange(a uint64) uint64 { // arm64: "MOVD [$]19088744" -"ADD [$]19088744" return a + 0x1234568 } -// Verify sign-extended values are not zero-extended under a bit mask (#61297) -func signextendAndMask8to64(a int8) (s, z uint64) { +func bitsSignExtendAndMask8to64U(a int8) (s, z uint64) { + // Verify sign-extended values are not zero-extended under a bit mask (#61297) + // ppc64x: "MOVB", "ANDCC [$]1015," s = uint64(a) & 0x3F7 // ppc64x: -"MOVB", "ANDCC [$]247," @@ -410,8 +524,9 @@ func signextendAndMask8to64(a int8) (s, z uint64) { return } -// Verify zero-extended values are not sign-extended under a bit mask (#61297) -func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) { +func bitsZeroExtendAndMask8toU64(a int8, b int16) (x, y uint64) { + // Verify zero-extended values are not sign-extended under a bit mask (#61297) + // ppc64x: -"MOVB ", -"ANDCC", "MOVBZ" x = uint64(a) & 0xFF // ppc64x: -"MOVH ", -"ANDCC", "MOVHZ" @@ -419,8 +534,9 @@ func zeroextendAndMask8to64(a int8, b int16) (x, y uint64) { return } -// Verify rotate and mask instructions, and further simplified instructions for small types -func bitRotateAndMask(io64 [8]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) { +func bitsRotateAndMask(io64 [8]uint64, io32 [4]uint32, io16 [4]uint16, io8 [4]uint8) { + // Verify rotate and mask instructions, and further simplified instructions for small types + // ppc64x: "RLDICR [$]0, R[0-9]*, [$]47, R" io64[0] = io64[0] & 0xFFFFFFFFFFFF0000 // ppc64x: "RLDICL [$]0, R[0-9]*, [$]16, R" diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go index bcce21e404..0b550adc05 100644 --- a/test/codegen/comparisons.go +++ b/test/codegen/comparisons.go @@ -660,13 +660,13 @@ func equalVarString8(a string) bool { return a[:8] == b } -func equalVarStringNoSpill(a,b string) bool { +func equalVarStringNoSpill(a, b string) bool { s := string("ZZZZZZZZZ") // arm64:".*memequal" memeq1 := a[:9] == s // arm64:-".*" memeq2 := s == a[:9] - // arm64:-"MOVB\tR0,.*SP",".*memequal" + // arm64:-"MOVB R0,.*SP",".*memequal" memeq3 := s == b[:9] return memeq1 && memeq2 && memeq3 } diff --git a/test/codegen/simd.go b/test/codegen/simd.go index 8f3a1a9f46..04e01944de 100644 --- a/test/codegen/simd.go +++ b/test/codegen/simd.go @@ -6,11 +6,14 @@ // These tests check code generation of simd peephole optimizations. -//go:build goexperiment.simd +//go:build goexperiment.simd && amd64 package codegen -import "simd/archsimd" +import ( + "math" + "simd/archsimd" +) func vptest1() bool { v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1}) @@ -77,3 +80,27 @@ func simdMaskedMerge() archsimd.Int16x16 { mask := archsimd.Mask16x16FromBits(5) return x.Add(y).Merge(x, mask) // amd64:`VPBLENDVB\s.*$` } + +var nan = math.NaN() +var floats64s = []float64{0, 1, 2, nan, 4, nan, 6, 7, 8, 9, 10, 11, nan, 13, 14, 15} +var sinkInt64s = make([]int64, 100) + +func simdIsNaN() { + x := archsimd.LoadFloat64x4Slice(floats64s) + y := archsimd.LoadFloat64x4Slice(floats64s[4:]) + a := x.IsNaN() + b := y.IsNaN() + // amd64:"VCMPPD [$]3," -"VPOR" + c := a.Or(b) + c.ToInt64x4().StoreSlice(sinkInt64s) +} + +func simdIsNaN512() { + x := archsimd.LoadFloat64x8Slice(floats64s) + y := archsimd.LoadFloat64x8Slice(floats64s[8:]) + a := x.IsNaN() + b := y.IsNaN() + // amd64:"VCMPPD [$]3," -"VPOR" + c := a.Or(b) + c.ToInt64x8().StoreSlice(sinkInt64s) +} diff --git a/test/fixedbugs/issue76950.go b/test/fixedbugs/issue76950.go new file mode 100644 index 0000000000..b5716e0fc6 --- /dev/null +++ b/test/fixedbugs/issue76950.go @@ -0,0 +1,67 @@ +// compile + +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package p + +func MatchLog(input string) bool { + pos := 0 + n := len(input) + matchState := -1 + var c byte + + goto State12 + +State8: + goto State65 + +State12: + if pos >= n { + goto End + } + c = input[pos] + switch { + case c >= 0x09 && c <= 0x0A || c >= 0x0C && c <= 0x0D || c == ' ': + case c >= '0' && c <= '9': + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'b' && c <= 'z': + case c == '[': + goto State8 + case c == 'a': + default: + goto End + } + +State64: + matchState = 179 + if pos >= n { + goto End + } + pos = n + goto State64 + +State65: + +State66: + matchState = 181 + if pos >= n { + goto End + } + pos = n + goto State66 + +End: + if matchState != -1 { + switch matchState { + case 178: + case 156: + case 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175: + case 176, 177, 181, 182, 183: + case 179, 184: + case 180: + } + return true + } + return false +} diff --git a/test/map.go b/test/map.go index 2c1cf8a140..b72fe59bd7 100644 --- a/test/map.go +++ b/test/map.go @@ -431,7 +431,7 @@ func testbasic() { { _, b := mpTi[apT[i]] if b { - panic(fmt.Sprintf("tuple nonexistence decl: mpTi[apt[%d]]", i)) + panic(fmt.Sprintf("tuple nonexistence decl: mpTi[apT[%d]]", i)) } _, b = mpTi[apT[i]] if b { diff --git a/test/stringrange.go b/test/stringrange.go index 99e5edb5a4..d98013b876 100644 --- a/test/stringrange.go +++ b/test/stringrange.go @@ -59,7 +59,7 @@ func main() { for _, c := range "a\xed\xa0\x80a" { if c != 'a' && c != utf8.RuneError { - fmt.Printf("surrogate UTF-8 does not error: %U\n", c) + fmt.Printf("surrogate UTF-8 does not produce an error: %U\n", c) ok = false } } diff --git a/test/typeparam/typelist.go b/test/typeparam/typelist.go index cd8ef7d6e7..b3226301fb 100644 --- a/test/typeparam/typelist.go +++ b/test/typeparam/typelist.go @@ -32,7 +32,7 @@ func _[T interface{ ~int }](x T) { var _ T = T(myint(42)) } -// Indexing a generic type which has a an array as core type. +// Indexing a generic type which has an array as core type. func _[T interface{ ~[10]int }](x T) { _ = x[9] // ok } diff --git a/test/uintptrescapes.dir/main.go b/test/uintptrescapes.dir/main.go index afda6218ad..0ccb18f9ff 100644 --- a/test/uintptrescapes.dir/main.go +++ b/test/uintptrescapes.dir/main.go @@ -49,7 +49,7 @@ func main() { defer wg.Done() b := F1() if b != 42 { - fmt.Printf("F1: got %d, expected 42\n", b) + fmt.Printf("F1: got %d, want 42\n", b) c <- false } }() @@ -58,7 +58,7 @@ func main() { defer wg.Done() b := F2() if b != 42 { - fmt.Printf("F2: got %d, expected 42\n", b) + fmt.Printf("F2: got %d, want 42\n", b) c <- false } }() @@ -67,7 +67,7 @@ func main() { defer wg.Done() b := M1() if b != 42 { - fmt.Printf("M1: got %d, expected 42\n", b) + fmt.Printf("M1: got %d, want 42\n", b) c <- false } }() @@ -76,7 +76,7 @@ func main() { defer wg.Done() b := M2() if b != 42 { - fmt.Printf("M2: got %d, expected 42\n", b) + fmt.Printf("M2: got %d, want 42\n", b) c <- false } }() |
