diff options
| author | Cherry Mui <cherryyz@google.com> | 2025-12-19 15:21:15 -0500 |
|---|---|---|
| committer | Cherry Mui <cherryyz@google.com> | 2025-12-19 14:39:26 -0800 |
| commit | 42cda7c1dfcc1ab109766f2016efe2331b3d0aab (patch) | |
| tree | 43ffea03bdb49895af96c105044b5ffc61b56de1 /src/simd | |
| parent | baa0ae3aaacfcef6ae04beba78a2d2b06776e423 (diff) | |
| download | go-42cda7c1dfcc1ab109766f2016efe2331b3d0aab.tar.xz | |
simd/archsimd: add Grouped for 256- and 512-bit SaturateTo(U)Int16Concat, and fix type
They operate on 128-bit groups, so name them Grouped to be clear,
and consistent with other grouped operations. Reword the
documentation, mention the grouping only for grouped versions.
Also, SaturateToUnt16Concat(Grouped) is a signed int32 to unsigned
uint16 saturated conversion. The receiver and the parameter should
be signed. The result remains unsigned.
Change-Id: I30e28bc05e07f5c28214c9c6d9d201cbbb183468
Reviewed-on: https://go-review.googlesource.com/c/go/+/731501
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/simd')
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/godefs.go | 2 | ||||
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml | 4 | ||||
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml | 32 | ||||
| -rw-r--r-- | src/simd/archsimd/ops_amd64.go | 43 |
4 files changed, 59 insertions, 22 deletions
diff --git a/src/simd/archsimd/_gen/simdgen/godefs.go b/src/simd/archsimd/_gen/simdgen/godefs.go index 3ac74264e8..71cae158f7 100644 --- a/src/simd/archsimd/_gen/simdgen/godefs.go +++ b/src/simd/archsimd/_gen/simdgen/godefs.go @@ -142,7 +142,7 @@ func (o *Operation) DecodeUnified(v *unify.Value) error { outLanes := o.Out[0].Lanes if inLanes != nil && outLanes != nil && *inLanes < *outLanes { if (strings.Contains(o.Go, "Saturate") || strings.Contains(o.Go, "Truncate")) && - !strings.HasSuffix(o.Go, "Concat") { + !strings.Contains(o.Go, "Concat") { o.Documentation += "\n// Results are packed to low elements in the returned vector, its upper elements are zeroed." } } diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml index 1e2a6a9b69..88e4840920 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml @@ -64,7 +64,7 @@ regexpTag: "convert" documentation: !string |- // NAME truncates element values to int16. -- go: "SaturateToInt16(Concat)?" +- go: "SaturateToInt16(Concat(Grouped)?)?" commutative: false regexpTag: "convert" documentation: !string |- @@ -109,7 +109,7 @@ regexpTag: "convert" documentation: !string |- // NAME truncates element values to uint16. -- go: "SaturateToUint16(Concat)?" +- go: "SaturateToUint16(Concat(Grouped)?)?" commutative: false regexpTag: "convert" documentation: !string |- diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml index 1d688b434d..f436be23ef 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml @@ -446,22 +446,48 @@ asm: "VPACKSSDW" addDoc: &satDocConcat !string |- + // The converted elements from x will be packed to the lower part of the result vector, + // the converted elements from y will be packed to the upper part of the result vector. + in: + - base: int + - base: int + out: + - base: int + bits: 128 +- go: SaturateToInt16ConcatGrouped + regexpTag: "convert" + asm: "VPACKSSDW" + addDoc: &satDocConcatGrouped + !string |- // With each 128-bit as a group: - // The converted group from the first input vector will be packed to the lower part of the result vector, - // the converted group from the second input vector will be packed to the upper part of the result vector. + // The converted elements from x will be packed to the lower part of the group in the result vector, + // the converted elements from y will be packed to the upper part of the group in the result vector. in: - base: int - base: int out: - base: int + bits: 256|512 - go: SaturateToUint16Concat regexpTag: "convert" asm: "VPACKUSDW" + addDoc: *satDocConcat in: + - base: int + - base: int + out: - base: uint - - base: uint + bits: 128 +- go: SaturateToUint16ConcatGrouped + regexpTag: "convert" + asm: "VPACKUSDW" + addDoc: *satDocConcatGrouped + in: + - base: int + - base: int out: - base: uint + bits: 256|512 # low-part only conversions. # uint8->uint16 diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index 304c0c0796..2a8a6bd4c6 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -5418,28 +5418,29 @@ func (x Int64x8) SaturateToInt16() Int16x8 /* SaturateToInt16Concat */ // SaturateToInt16Concat converts element values to int16 with signed saturation. -// With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second input vector will be packed to the upper part of the result vector. +// The converted elements from x will be packed to the lower part of the result vector, +// the converted elements from y will be packed to the upper part of the result vector. // // Asm: VPACKSSDW, CPU Feature: AVX func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8 -// SaturateToInt16Concat converts element values to int16 with signed saturation. +/* SaturateToInt16ConcatGrouped */ + +// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation. // With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second input vector will be packed to the upper part of the result vector. +// The converted elements from x will be packed to the lower part of the group in the result vector, +// the converted elements from y will be packed to the upper part of the group in the result vector. // // Asm: VPACKSSDW, CPU Feature: AVX2 -func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16 +func (x Int32x8) SaturateToInt16ConcatGrouped(y Int32x8) Int16x16 -// SaturateToInt16Concat converts element values to int16 with signed saturation. +// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation. // With each 128-bit as a group: -// The converted group from the first input vector will be packed to the lower part of the result vector, -// the converted group from the second input vector will be packed to the upper part of the result vector. +// The converted elements from x will be packed to the lower part of the group in the result vector, +// the converted elements from y will be packed to the upper part of the group in the result vector. // // Asm: VPACKSSDW, CPU Feature: AVX512 -func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32 +func (x Int32x16) SaturateToInt16ConcatGrouped(y Int32x16) Int16x32 /* SaturateToInt32 */ @@ -5550,19 +5551,29 @@ func (x Uint64x8) SaturateToUint16() Uint16x8 /* SaturateToUint16Concat */ // SaturateToUint16Concat converts element values to uint16 with unsigned saturation. +// The converted elements from x will be packed to the lower part of the result vector, +// the converted elements from y will be packed to the upper part of the result vector. // // Asm: VPACKUSDW, CPU Feature: AVX -func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8 +func (x Int32x4) SaturateToUint16Concat(y Int32x4) Uint16x8 -// SaturateToUint16Concat converts element values to uint16 with unsigned saturation. +/* SaturateToUint16ConcatGrouped */ + +// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation. +// With each 128-bit as a group: +// The converted elements from x will be packed to the lower part of the group in the result vector, +// the converted elements from y will be packed to the upper part of the group in the result vector. // // Asm: VPACKUSDW, CPU Feature: AVX2 -func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16 +func (x Int32x8) SaturateToUint16ConcatGrouped(y Int32x8) Uint16x16 -// SaturateToUint16Concat converts element values to uint16 with unsigned saturation. +// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation. +// With each 128-bit as a group: +// The converted elements from x will be packed to the lower part of the group in the result vector, +// the converted elements from y will be packed to the upper part of the group in the result vector. // // Asm: VPACKUSDW, CPU Feature: AVX512 -func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32 +func (x Int32x16) SaturateToUint16ConcatGrouped(y Int32x16) Uint16x32 /* SaturateToUint32 */ |
