aboutsummaryrefslogtreecommitdiff
path: root/src/simd
diff options
context:
space:
mode:
authorJunyang Shao <shaojunyang@google.com>2025-06-12 16:43:10 +0000
committerJunyang Shao <shaojunyang@google.com>2025-06-13 12:15:18 -0700
commitded6e0ac7140403480fa4539ed42ae8577eefbf9 (patch)
tree8321ea8c2e49cbff521d1f57624ea7fb563197b4 /src/simd
parent3df41c856e09cb0111604865a652f946379aad7a (diff)
downloadgo-ded6e0ac7140403480fa4539ed42ae8577eefbf9.tar.xz
[dev.simd] cmd/compile: add more dot products
This CL is generated by CL 680215. Change-Id: Ie085e65e0473a8e96170702d7265d379ec8812ba Reviewed-on: https://go-review.googlesource.com/c/go/+/681298 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/simd')
-rw-r--r--src/simd/stubs_amd64.go181
1 files changed, 181 insertions, 0 deletions
diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go
index 83edaf2270..49af32bc4f 100644
--- a/src/simd/stubs_amd64.go
+++ b/src/simd/stubs_amd64.go
@@ -766,6 +766,7 @@ func (x Float64x2) AndNot(y Float64x2) Float64x2
func (x Float64x2) Div(y Float64x2) Float64x2
// DotProdBroadcast multiplies all elements and broadcasts the sum.
+// Const Immediate = 127.
//
// Asm: VDPPD, CPU Feature: AVX
func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2
@@ -4437,6 +4438,26 @@ func (x Int32x16) MaskedSub(y Int32x16, z Mask32x16) Int32x16
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Int32x16) MaskedXor(y Int32x16, z Mask32x16) Int32x16
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int32x16) Int32x16
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Int32x16
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512EVEX
@@ -4518,6 +4539,26 @@ func (x Int32x4) MaskedSub(y Int32x4, z Mask32x4) Int32x4
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Int32x4) MaskedXor(y Int32x4, z Mask32x4) Int32x4
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
+func (x Int32x4) PairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
+func (x Int32x4) SaturatedPairDotProdAccumulate(y Int32x4, z Int32x4) Int32x4
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Int32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Int32x4
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512EVEX
@@ -4599,6 +4640,26 @@ func (x Int32x8) MaskedSub(y Int32x8, z Mask32x8) Int32x8
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Int32x8) MaskedXor(y Int32x8, z Mask32x8) Int32x8
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX_VNNI
+func (x Int32x8) PairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX_VNNI
+func (x Int32x8) SaturatedPairDotProdAccumulate(y Int32x8, z Int32x8) Int32x8
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Int32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Int32x8
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512EVEX
@@ -5380,6 +5441,16 @@ func (x Uint32x16) MaskedSub(y Uint32x16, z Mask32x16) Uint32x16
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Uint32x16) MaskedXor(y Uint32x16, z Mask32x16) Uint32x16
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16) Uint32x16
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512EVEX
@@ -5456,6 +5527,16 @@ func (x Uint32x4) MaskedSub(y Uint32x4, z Mask32x4) Uint32x4
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Uint32x4) MaskedXor(y Uint32x4, z Mask32x4) Uint32x4
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Uint32x4) UnsignedSignedQuadDotProdAccumulate(y Uint32x4, z Int32x4) Uint32x4
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDD, CPU Feature: AVX512EVEX
@@ -5532,6 +5613,16 @@ func (x Uint32x8) MaskedSub(y Uint32x8, z Mask32x8) Uint32x8
// Asm: VPXORD, CPU Feature: AVX512EVEX
func (x Uint32x8) MaskedXor(y Uint32x8, z Mask32x8) Uint32x8
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX_VNNI
+func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX_VNNI
+func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint32x8, z Int32x8) Uint32x8
+
// Add adds corresponding elements of two vectors.
//
// Asm: VPADDQ, CPU Feature: AVX512EVEX
@@ -5991,6 +6082,96 @@ func (x Uint8x64) MaskedSaturatedSub(y Uint8x64, z Mask8x64) Uint8x64
// Asm: VPSUBB, CPU Feature: AVX512EVEX
func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int32x16, u Mask32x16) Int32x16
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Int32x16
+
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int32x4, u Mask32x4) Int32x4
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Int32x4
+
+// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
+
+// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int32x8, u Mask32x8) Int32x8
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Int32x8
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int32x16, u Mask32x16) Uint32x16
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int32x4, u Mask32x4) Uint32x4
+
+// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
+
+// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x.
+//
+// Asm: VPDPBUSD, CPU Feature: AVX512EVEX
+func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int32x8, u Mask32x8) Uint32x8
+
// CeilSuppressExceptionWithPrecision rounds elements up with specified precision, suppressing exceptions.
// Const Immediate = 10.
//