aboutsummaryrefslogtreecommitdiff
path: root/src/simd
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2025-12-19 15:21:15 -0500
committerCherry Mui <cherryyz@google.com>2025-12-19 14:39:26 -0800
commit42cda7c1dfcc1ab109766f2016efe2331b3d0aab (patch)
tree43ffea03bdb49895af96c105044b5ffc61b56de1 /src/simd
parentbaa0ae3aaacfcef6ae04beba78a2d2b06776e423 (diff)
downloadgo-42cda7c1dfcc1ab109766f2016efe2331b3d0aab.tar.xz
simd/archsimd: add Grouped for 256- and 512-bit SaturateTo(U)Int16Concat, and fix type
They operate on 128-bit groups, so name them Grouped to be clear, and consistent with other grouped operations. Reword the documentation, mention the grouping only for grouped versions. Also, SaturateToUnt16Concat(Grouped) is a signed int32 to unsigned uint16 saturated conversion. The receiver and the parameter should be signed. The result remains unsigned. Change-Id: I30e28bc05e07f5c28214c9c6d9d201cbbb183468 Reviewed-on: https://go-review.googlesource.com/c/go/+/731501 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/simd')
-rw-r--r--src/simd/archsimd/_gen/simdgen/godefs.go2
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml4
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml32
-rw-r--r--src/simd/archsimd/ops_amd64.go43
4 files changed, 59 insertions, 22 deletions
diff --git a/src/simd/archsimd/_gen/simdgen/godefs.go b/src/simd/archsimd/_gen/simdgen/godefs.go
index 3ac74264e8..71cae158f7 100644
--- a/src/simd/archsimd/_gen/simdgen/godefs.go
+++ b/src/simd/archsimd/_gen/simdgen/godefs.go
@@ -142,7 +142,7 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
outLanes := o.Out[0].Lanes
if inLanes != nil && outLanes != nil && *inLanes < *outLanes {
if (strings.Contains(o.Go, "Saturate") || strings.Contains(o.Go, "Truncate")) &&
- !strings.HasSuffix(o.Go, "Concat") {
+ !strings.Contains(o.Go, "Concat") {
o.Documentation += "\n// Results are packed to low elements in the returned vector, its upper elements are zeroed."
}
}
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml
index 1e2a6a9b69..88e4840920 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml
@@ -64,7 +64,7 @@
regexpTag: "convert"
documentation: !string |-
// NAME truncates element values to int16.
-- go: "SaturateToInt16(Concat)?"
+- go: "SaturateToInt16(Concat(Grouped)?)?"
commutative: false
regexpTag: "convert"
documentation: !string |-
@@ -109,7 +109,7 @@
regexpTag: "convert"
documentation: !string |-
// NAME truncates element values to uint16.
-- go: "SaturateToUint16(Concat)?"
+- go: "SaturateToUint16(Concat(Grouped)?)?"
commutative: false
regexpTag: "convert"
documentation: !string |-
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
index 1d688b434d..f436be23ef 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
@@ -446,22 +446,48 @@
asm: "VPACKSSDW"
addDoc: &satDocConcat
!string |-
+ // The converted elements from x will be packed to the lower part of the result vector,
+ // the converted elements from y will be packed to the upper part of the result vector.
+ in:
+ - base: int
+ - base: int
+ out:
+ - base: int
+ bits: 128
+- go: SaturateToInt16ConcatGrouped
+ regexpTag: "convert"
+ asm: "VPACKSSDW"
+ addDoc: &satDocConcatGrouped
+ !string |-
// With each 128-bit as a group:
- // The converted group from the first input vector will be packed to the lower part of the result vector,
- // the converted group from the second input vector will be packed to the upper part of the result vector.
+ // The converted elements from x will be packed to the lower part of the group in the result vector,
+ // the converted elements from y will be packed to the upper part of the group in the result vector.
in:
- base: int
- base: int
out:
- base: int
+ bits: 256|512
- go: SaturateToUint16Concat
regexpTag: "convert"
asm: "VPACKUSDW"
+ addDoc: *satDocConcat
in:
+ - base: int
+ - base: int
+ out:
- base: uint
- - base: uint
+ bits: 128
+- go: SaturateToUint16ConcatGrouped
+ regexpTag: "convert"
+ asm: "VPACKUSDW"
+ addDoc: *satDocConcatGrouped
+ in:
+ - base: int
+ - base: int
out:
- base: uint
+ bits: 256|512
# low-part only conversions.
# uint8->uint16
diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go
index 304c0c0796..2a8a6bd4c6 100644
--- a/src/simd/archsimd/ops_amd64.go
+++ b/src/simd/archsimd/ops_amd64.go
@@ -5418,28 +5418,29 @@ func (x Int64x8) SaturateToInt16() Int16x8
/* SaturateToInt16Concat */
// SaturateToInt16Concat converts element values to int16 with signed saturation.
-// With each 128-bit as a group:
-// The converted group from the first input vector will be packed to the lower part of the result vector,
-// the converted group from the second input vector will be packed to the upper part of the result vector.
+// The converted elements from x will be packed to the lower part of the result vector,
+// the converted elements from y will be packed to the upper part of the result vector.
//
// Asm: VPACKSSDW, CPU Feature: AVX
func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8
-// SaturateToInt16Concat converts element values to int16 with signed saturation.
+/* SaturateToInt16ConcatGrouped */
+
+// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation.
// With each 128-bit as a group:
-// The converted group from the first input vector will be packed to the lower part of the result vector,
-// the converted group from the second input vector will be packed to the upper part of the result vector.
+// The converted elements from x will be packed to the lower part of the group in the result vector,
+// the converted elements from y will be packed to the upper part of the group in the result vector.
//
// Asm: VPACKSSDW, CPU Feature: AVX2
-func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16
+func (x Int32x8) SaturateToInt16ConcatGrouped(y Int32x8) Int16x16
-// SaturateToInt16Concat converts element values to int16 with signed saturation.
+// SaturateToInt16ConcatGrouped converts element values to int16 with signed saturation.
// With each 128-bit as a group:
-// The converted group from the first input vector will be packed to the lower part of the result vector,
-// the converted group from the second input vector will be packed to the upper part of the result vector.
+// The converted elements from x will be packed to the lower part of the group in the result vector,
+// the converted elements from y will be packed to the upper part of the group in the result vector.
//
// Asm: VPACKSSDW, CPU Feature: AVX512
-func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32
+func (x Int32x16) SaturateToInt16ConcatGrouped(y Int32x16) Int16x32
/* SaturateToInt32 */
@@ -5550,19 +5551,29 @@ func (x Uint64x8) SaturateToUint16() Uint16x8
/* SaturateToUint16Concat */
// SaturateToUint16Concat converts element values to uint16 with unsigned saturation.
+// The converted elements from x will be packed to the lower part of the result vector,
+// the converted elements from y will be packed to the upper part of the result vector.
//
// Asm: VPACKUSDW, CPU Feature: AVX
-func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8
+func (x Int32x4) SaturateToUint16Concat(y Int32x4) Uint16x8
-// SaturateToUint16Concat converts element values to uint16 with unsigned saturation.
+/* SaturateToUint16ConcatGrouped */
+
+// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation.
+// With each 128-bit as a group:
+// The converted elements from x will be packed to the lower part of the group in the result vector,
+// the converted elements from y will be packed to the upper part of the group in the result vector.
//
// Asm: VPACKUSDW, CPU Feature: AVX2
-func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16
+func (x Int32x8) SaturateToUint16ConcatGrouped(y Int32x8) Uint16x16
-// SaturateToUint16Concat converts element values to uint16 with unsigned saturation.
+// SaturateToUint16ConcatGrouped converts element values to uint16 with unsigned saturation.
+// With each 128-bit as a group:
+// The converted elements from x will be packed to the lower part of the group in the result vector,
+// the converted elements from y will be packed to the upper part of the group in the result vector.
//
// Asm: VPACKUSDW, CPU Feature: AVX512
-func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32
+func (x Int32x16) SaturateToUint16ConcatGrouped(y Int32x16) Uint16x32
/* SaturateToUint32 */