aboutsummaryrefslogtreecommitdiff
path: root/src/simd/archsimd/_gen
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2026-01-05 12:56:08 -0500
committerCherry Mui <cherryyz@google.com>2026-01-05 12:22:51 -0800
commit9b2e3b9a02bd8872bdbf5c6086674fa6b4bc8ef9 (patch)
tree51a43dfc235315fbbc88226f0e52ab77cb628e46 /src/simd/archsimd/_gen
parentf8ee0f84753b22254d217bf28ce8ecca7db7025c (diff)
downloadgo-9b2e3b9a02bd8872bdbf5c6086674fa6b4bc8ef9.tar.xz
simd/archsimd: use V(P)MOVMSK for mask ToBits if possible
VPMOVMSKB, VMOVMSKPS, and VMOVMSKPD moves AVX1/2-style masks to integer registers, similar to VPMOV[BWDQ]2M (which moves to mask registers). The former is available on AVX1/2, the latter requires AVX512. So use the former if it is supported, i.e. for 128- and 256-bit vectors with 8-, 32-, and 64-bit elements (16-bit elements always require AVX512). Change-Id: I972195116617ed2faaf95cee5cd6b250e671496c Reviewed-on: https://go-review.googlesource.com/c/go/+/734060 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
Diffstat (limited to 'src/simd/archsimd/_gen')
-rw-r--r--src/simd/archsimd/_gen/simdgen/gen_simdTypes.go29
1 files changed, 28 insertions, 1 deletions
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
index d8c4481296..dd3a75eb44 100644
--- a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
+++ b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
@@ -93,6 +93,33 @@ func (x simdType) MaskedStoreDoc() string {
}
}
+func (x simdType) ToBitsDoc() string {
+ if x.Size == 512 || x.ElemBits() == 16 {
+ return fmt.Sprintf("// Asm: KMOV%s, CPU Features: AVX512", x.IntelSizeSuffix())
+ }
+ // 128/256 bit vectors with 8, 32, 64 bit elements
+ var asm string
+ var feat string
+ switch x.ElemBits() {
+ case 8:
+ asm = "VPMOVMSKB"
+ if x.Size == 256 {
+ feat = "AVX2"
+ } else {
+ feat = "AVX"
+ }
+ case 32:
+ asm = "VMOVMSKPS"
+ feat = "AVX"
+ case 64:
+ asm = "VMOVMSKPD"
+ feat = "AVX"
+ default:
+ panic("unexpected ElemBits")
+ }
+ return fmt.Sprintf("// Asm: %s, CPU Features: %s", asm, feat)
+}
+
func compareSimdTypes(x, y simdType) int {
// "vreg" then "mask"
if c := -compareNatural(x.Type, y.Type); c != 0 {
@@ -210,7 +237,7 @@ func {{.Name}}FromBits(y uint{{.LanesContainer}}) {{.Name}}
// Only the lower {{.Lanes}} bits of y are used.
{{- end}}
//
-// Asm: KMOV{{.IntelSizeSuffix}}, CPU Features: AVX512
+{{.ToBitsDoc}}
func (x {{.Name}}) ToBits() uint{{.LanesContainer}}
`