diff options
| author | Junyang Shao <shaojunyang@google.com> | 2025-11-04 20:33:52 +0000 |
|---|---|---|
| committer | Junyang Shao <shaojunyang@google.com> | 2025-11-10 09:53:20 -0800 |
| commit | 972732b245399097e1e59aa2e35c47ef5efbf394 (patch) | |
| tree | 74f0faf2b2c6d2cc7eeff8f8a012340c1abb2183 /src/simd | |
| parent | bf77323efa55a4fbe86a3e19c84d12533f5f10af (diff) | |
| download | go-972732b245399097e1e59aa2e35c47ef5efbf394.tar.xz | |
[dev.simd] simd, cmd/compile: remove move from API
These should really be machine ops only.
Change-Id: Idcc611719eff068153d88c5162dd2e0883e5e0ca
Reviewed-on: https://go-review.googlesource.com/c/go/+/717821
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/simd')
| -rw-r--r-- | src/simd/_gen/simdgen/gen_simdTypes.go | 6 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/ops/Moves/categories.yaml | 5 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/ops/Moves/go.yaml | 2 | ||||
| -rw-r--r-- | src/simd/ops_amd64.go | 12 | ||||
| -rw-r--r-- | src/simd/ops_internal_amd64.go | 88 |
5 files changed, 16 insertions, 97 deletions
diff --git a/src/simd/_gen/simdgen/gen_simdTypes.go b/src/simd/_gen/simdgen/gen_simdTypes.go index 7765327b32..efa3ffabeb 100644 --- a/src/simd/_gen/simdgen/gen_simdTypes.go +++ b/src/simd/_gen/simdgen/gen_simdTypes.go @@ -613,7 +613,11 @@ func writeSIMDStubs(ops []Operation, typeMap simdTypeMap) (f, fI *bytes.Buffer) } } if i == 0 || op.Go != ops[i-1].Go { - fmt.Fprintf(f, "\n/* %s */\n", op.Go) + if unicode.IsUpper([]rune(op.Go)[0]) { + fmt.Fprintf(f, "\n/* %s */\n", op.Go) + } else { + fmt.Fprintf(fI, "\n/* %s */\n", op.Go) + } } if unicode.IsUpper([]rune(op.Go)[0]) { if err := t.ExecuteTemplate(f, s, op); err != nil { diff --git a/src/simd/_gen/simdgen/ops/Moves/categories.yaml b/src/simd/_gen/simdgen/ops/Moves/categories.yaml index 49006f8801..b1283f4b6b 100644 --- a/src/simd/_gen/simdgen/ops/Moves/categories.yaml +++ b/src/simd/_gen/simdgen/ops/Moves/categories.yaml @@ -52,9 +52,8 @@ // the first or the second based on whether the third is false or true - go: move commutative: false - documentation: !string |- - // NAME blends a vector with zero, with the original value where the mask is true - // and zero where the mask is false. + noTypes: "true" + noGenericOps: "true" - go: Expand commutative: false documentation: !string |- diff --git a/src/simd/_gen/simdgen/ops/Moves/go.yaml b/src/simd/_gen/simdgen/ops/Moves/go.yaml index 495b9ed6fa..08e857c8ea 100644 --- a/src/simd/_gen/simdgen/ops/Moves/go.yaml +++ b/src/simd/_gen/simdgen/ops/Moves/go.yaml @@ -291,7 +291,6 @@ in: - &v go: $t - bits: 512 class: vreg base: int|uint inVariant: @@ -307,7 +306,6 @@ in: - &v go: $t - bits: 512 class: vreg base: float inVariant: diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index ace2f7aec8..0f21c8594c 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -7606,18 +7606,6 @@ func (x Uint64x4) Xor(y Uint64x4) Uint64x4 // Asm: VPXORQ, CPU Feature: AVX512 func (x Uint64x8) Xor(y Uint64x8) Uint64x8 -/* blend */ - -/* blendMasked */ - -/* concatSelectedConstant */ - -/* concatSelectedConstantGrouped */ - -/* moveMasked */ - -/* tern */ - // Float64x2 converts from Float32x4 to Float64x2 func (from Float32x4) AsFloat64x2() (to Float64x2) diff --git a/src/simd/ops_internal_amd64.go b/src/simd/ops_internal_amd64.go index cb18c90e29..8be40995f0 100644 --- a/src/simd/ops_internal_amd64.go +++ b/src/simd/ops_internal_amd64.go @@ -4,6 +4,8 @@ package simd +/* blend */ + // blend blends two vectors based on mask values, choosing either // the first or the second based on whether the third is false or true // @@ -16,6 +18,8 @@ func (x Int8x16) blend(y Int8x16, mask Int8x16) Int8x16 // Asm: VPBLENDVB, CPU Feature: AVX2 func (x Int8x32) blend(y Int8x32, mask Int8x32) Int8x32 +/* blendMasked */ + // blendMasked blends two vectors based on mask values, choosing either // the first or the second based on whether the third is false or true // @@ -48,6 +52,8 @@ func (x Int32x16) blendMasked(y Int32x16, mask Mask32x16) Int32x16 // Asm: VPBLENDMQ, CPU Feature: AVX512 func (x Int64x8) blendMasked(y Int64x8, mask Mask64x8) Int64x8 +/* concatSelectedConstant */ + // concatSelectedConstant concatenates selected elements from x and y into the lower and upper // halves of the output. The selection is chosen by the constant parameter h1h0l1l0 // where each {h,l}{1,0} is two bits specify which element from y or x to select. @@ -117,6 +123,8 @@ func (x Uint32x4) concatSelectedConstant(h1h0l1l0 uint8, y Uint32x4) Uint32x4 // Asm: VSHUFPD, CPU Feature: AVX func (x Uint64x2) concatSelectedConstant(hilo uint8, y Uint64x2) Uint64x2 +/* concatSelectedConstantGrouped */ + // concatSelectedConstantGrouped concatenates selected elements from 128-bit subvectors of x and y // into the lower and upper halves of corresponding subvectors of the output. // The selection is chosen by the constant parameter h1h0l1l0 @@ -330,85 +338,7 @@ func (x Uint64x4) concatSelectedConstantGrouped(hilos uint8, y Uint64x4) Uint64x // Asm: VSHUFPD, CPU Feature: AVX512 func (x Uint64x8) concatSelectedConstantGrouped(hilos uint8, y Uint64x8) Uint64x8 -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVUPS, CPU Feature: AVX512 -func (x Float32x16) moveMasked(mask Mask32x16) Float32x16 - -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVUPD, CPU Feature: AVX512 -func (x Float64x8) moveMasked(mask Mask64x8) Float64x8 - -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVDQU8, CPU Feature: AVX512 -func (x Int8x64) moveMasked(mask Mask8x64) Int8x64 - -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVDQU16, CPU Feature: AVX512 -func (x Int16x32) moveMasked(mask Mask16x32) Int16x32 - -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVDQU32, CPU Feature: AVX512 -func (x Int32x16) moveMasked(mask Mask32x16) Int32x16 - -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVDQU64, CPU Feature: AVX512 -func (x Int64x8) moveMasked(mask Mask64x8) Int64x8 - -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVDQU8, CPU Feature: AVX512 -func (x Uint8x64) moveMasked(mask Mask8x64) Uint8x64 - -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVDQU16, CPU Feature: AVX512 -func (x Uint16x32) moveMasked(mask Mask16x32) Uint16x32 - -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVDQU32, CPU Feature: AVX512 -func (x Uint32x16) moveMasked(mask Mask32x16) Uint32x16 - -// moveMasked blends a vector with zero, with the original value where the mask is true -// and zero where the mask is false. -// -// This operation is applied selectively under a write mask. -// -// Asm: VMOVDQU64, CPU Feature: AVX512 -func (x Uint64x8) moveMasked(mask Mask64x8) Uint64x8 +/* tern */ // tern performs a logical operation on three vectors based on the 8-bit truth table. // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z)) |
