diff options
| author | Junyang Shao <shaojunyang@google.com> | 2025-10-10 19:18:01 +0000 |
|---|---|---|
| committer | Junyang Shao <shaojunyang@google.com> | 2025-10-14 12:26:44 -0700 |
| commit | 416332dba285e45d57899eac73eb161cb2cd6bf4 (patch) | |
| tree | c98449d3fbdb77830d4d41b94f183cfebaf6320f /src/simd | |
| parent | 647c7901438a3f26153d0820ddfa5d07c486a487 (diff) | |
| download | go-416332dba285e45d57899eac73eb161cb2cd6bf4.tar.xz | |
[dev.simd] cmd/compile, simd: update DotProd to DotProduct
API naming changes.
This CL also remove AddDotProductPairsSaturated.
Change-Id: I02e6d45268704f3ed4eaf62f0ecb7dc936b42124
Reviewed-on: https://go-review.googlesource.com/c/go/+/710935
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src/simd')
| -rw-r--r-- | src/simd/_gen/simdgen/ops/MLOps/categories.yaml | 18 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/ops/MLOps/go.yaml | 20 | ||||
| -rw-r--r-- | src/simd/ops_amd64.go | 73 |
3 files changed, 41 insertions, 70 deletions
diff --git a/src/simd/_gen/simdgen/ops/MLOps/categories.yaml b/src/simd/_gen/simdgen/ops/MLOps/categories.yaml index 772a7b3cf6..0317b42c6a 100644 --- a/src/simd/_gen/simdgen/ops/MLOps/categories.yaml +++ b/src/simd/_gen/simdgen/ops/MLOps/categories.yaml @@ -1,38 +1,34 @@ !sum -- go: DotProdPairs +- go: DotProductPairs commutative: false documentation: !string |- // NAME multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use. -- go: DotProdPairsSaturated +- go: DotProductPairsSaturated commutative: false documentation: !string |- // NAME multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. -# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. -# - go: DotProdBroadcast +# QuadDotProduct, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now. +# - go: DotProductBroadcast # commutative: true # # documentation: !string |- # // NAME multiplies all elements and broadcasts the sum. -- go: AddDotProdQuadruple +- go: AddDotProductQuadruple commutative: false documentation: !string |- // NAME performs dot products on groups of 4 elements of x and y and then adds z. -- go: AddDotProdQuadrupleSaturated +- go: AddDotProductQuadrupleSaturated commutative: false documentation: !string |- // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z. -- go: AddDotProdPairs +- go: AddDotProductPairs commutative: false noTypes: "true" noGenericOps: "true" documentation: !string |- // NAME performs dot products on pairs of elements of y and z and then adds x. -- go: AddDotProdPairsSaturated - commutative: false - documentation: !string |- - // NAME performs dot products on pairs of elements of y and z and then adds x. - go: MulAdd commutative: false documentation: !string |- diff --git a/src/simd/_gen/simdgen/ops/MLOps/go.yaml b/src/simd/_gen/simdgen/ops/MLOps/go.yaml index 5c2009dcf8..162c47ea0e 100644 --- a/src/simd/_gen/simdgen/ops/MLOps/go.yaml +++ b/src/simd/_gen/simdgen/ops/MLOps/go.yaml @@ -1,5 +1,5 @@ !sum -- go: DotProdPairs +- go: DotProductPairs asm: VPMADDWD in: - &int @@ -10,7 +10,7 @@ - &int2 # The elemBits are different go: $t2 base: int -- go: DotProdPairsSaturated +- go: DotProductPairsSaturated asm: VPMADDUBSW in: - &uint @@ -23,7 +23,7 @@ overwriteElementBits: 8 out: - *int2 -# - go: DotProdBroadcast +# - go: DotProductBroadcast # asm: VDPP[SD] # in: # - &dpb_src @@ -33,7 +33,7 @@ # const: 127 # out: # - *dpb_src -- go: AddDotProdQuadruple +- go: AddDotProductQuadruple asm: "VPDPBUSD" operandOrder: "31" # switch operand 3 and 1 in: @@ -51,7 +51,7 @@ overwriteElementBits: 8 out: - *qdpa_acc -- go: AddDotProdQuadrupleSaturated +- go: AddDotProductQuadrupleSaturated asm: "VPDPBUSDS" operandOrder: "31" # switch operand 3 and 1 in: @@ -60,7 +60,7 @@ - *qdpa_src2 out: - *qdpa_acc -- go: AddDotProdPairs +- go: AddDotProductPairs asm: "VPDPWSSD" in: - &pdpa_acc @@ -77,14 +77,6 @@ overwriteElementBits: 16 out: - *pdpa_acc -- go: AddDotProdPairsSaturated - asm: "VPDPWSSDS" - in: - - *pdpa_acc - - *pdpa_src1 - - *pdpa_src2 - out: - - *pdpa_acc - go: MulAdd asm: "VFMADD213PS|VFMADD213PD" in: diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 8956c2e077..2331622361 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -314,56 +314,39 @@ func (x Uint64x4) Add(y Uint64x4) Uint64x4 // Asm: VPADDQ, CPU Feature: AVX512 func (x Uint64x8) Add(y Uint64x8) Uint64x8 -/* AddDotProdPairsSaturated */ +/* AddDotProductQuadruple */ -// AddDotProdPairsSaturated performs dot products on pairs of elements of y and z and then adds x. -// -// Asm: VPDPWSSDS, CPU Feature: AVXVNNI -func (x Int32x4) AddDotProdPairsSaturated(y Int16x8, z Int16x8) Int32x4 - -// AddDotProdPairsSaturated performs dot products on pairs of elements of y and z and then adds x. -// -// Asm: VPDPWSSDS, CPU Feature: AVXVNNI -func (x Int32x8) AddDotProdPairsSaturated(y Int16x16, z Int16x16) Int32x8 - -// AddDotProdPairsSaturated performs dot products on pairs of elements of y and z and then adds x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI -func (x Int32x16) AddDotProdPairsSaturated(y Int16x32, z Int16x32) Int32x16 - -/* AddDotProdQuadruple */ - -// AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z. +// AddDotProductQuadruple performs dot products on groups of 4 elements of x and y and then adds z. // // Asm: VPDPBUSD, CPU Feature: AVXVNNI -func (x Int8x16) AddDotProdQuadruple(y Uint8x16, z Int32x4) Int32x4 +func (x Int8x16) AddDotProductQuadruple(y Uint8x16, z Int32x4) Int32x4 -// AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z. +// AddDotProductQuadruple performs dot products on groups of 4 elements of x and y and then adds z. // // Asm: VPDPBUSD, CPU Feature: AVXVNNI -func (x Int8x32) AddDotProdQuadruple(y Uint8x32, z Int32x8) Int32x8 +func (x Int8x32) AddDotProductQuadruple(y Uint8x32, z Int32x8) Int32x8 -// AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z. +// AddDotProductQuadruple performs dot products on groups of 4 elements of x and y and then adds z. // // Asm: VPDPBUSD, CPU Feature: AVX512VNNI -func (x Int8x64) AddDotProdQuadruple(y Uint8x64, z Int32x16) Int32x16 +func (x Int8x64) AddDotProductQuadruple(y Uint8x64, z Int32x16) Int32x16 -/* AddDotProdQuadrupleSaturated */ +/* AddDotProductQuadrupleSaturated */ -// AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z. +// AddDotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z. // // Asm: VPDPBUSDS, CPU Feature: AVXVNNI -func (x Int8x16) AddDotProdQuadrupleSaturated(y Uint8x16, z Int32x4) Int32x4 +func (x Int8x16) AddDotProductQuadrupleSaturated(y Uint8x16, z Int32x4) Int32x4 -// AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z. +// AddDotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z. // // Asm: VPDPBUSDS, CPU Feature: AVXVNNI -func (x Int8x32) AddDotProdQuadrupleSaturated(y Uint8x32, z Int32x8) Int32x8 +func (x Int8x32) AddDotProductQuadrupleSaturated(y Uint8x32, z Int32x8) Int32x8 -// AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z. +// AddDotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z. // // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI -func (x Int8x64) AddDotProdQuadrupleSaturated(y Uint8x64, z Int32x16) Int32x16 +func (x Int8x64) AddDotProductQuadrupleSaturated(y Uint8x64, z Int32x16) Int32x16 /* AddPairs */ @@ -2143,45 +2126,45 @@ func (x Float64x4) Div(y Float64x4) Float64x4 // Asm: VDIVPD, CPU Feature: AVX512 func (x Float64x8) Div(y Float64x8) Float64x8 -/* DotProdPairs */ +/* DotProductPairs */ -// DotProdPairs multiplies the elements and add the pairs together, +// DotProductPairs multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDWD, CPU Feature: AVX -func (x Int16x8) DotProdPairs(y Int16x8) Int32x4 +func (x Int16x8) DotProductPairs(y Int16x8) Int32x4 -// DotProdPairs multiplies the elements and add the pairs together, +// DotProductPairs multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDWD, CPU Feature: AVX2 -func (x Int16x16) DotProdPairs(y Int16x16) Int32x8 +func (x Int16x16) DotProductPairs(y Int16x16) Int32x8 -// DotProdPairs multiplies the elements and add the pairs together, +// DotProductPairs multiplies the elements and add the pairs together, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDWD, CPU Feature: AVX512 -func (x Int16x32) DotProdPairs(y Int16x32) Int32x16 +func (x Int16x32) DotProductPairs(y Int16x32) Int32x16 -/* DotProdPairsSaturated */ +/* DotProductPairsSaturated */ -// DotProdPairsSaturated multiplies the elements and add the pairs together with saturation, +// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDUBSW, CPU Feature: AVX -func (x Uint8x16) DotProdPairsSaturated(y Int8x16) Int16x8 +func (x Uint8x16) DotProductPairsSaturated(y Int8x16) Int16x8 -// DotProdPairsSaturated multiplies the elements and add the pairs together with saturation, +// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDUBSW, CPU Feature: AVX2 -func (x Uint8x32) DotProdPairsSaturated(y Int8x32) Int16x16 +func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16 -// DotProdPairsSaturated multiplies the elements and add the pairs together with saturation, +// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDUBSW, CPU Feature: AVX512 -func (x Uint8x64) DotProdPairsSaturated(y Int8x64) Int16x32 +func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32 /* Equal */ |
