aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile
diff options
context:
space:
mode:
authorDavid Chase <drchase@google.com>2025-07-21 14:30:55 -0400
committerDavid Chase <drchase@google.com>2025-08-01 13:09:29 -0700
commitacc1492b7d679914b485da0dd65d3faf202f4efa (patch)
treefc2d3586da2dbc8b83325ae5b817ecef4932af2d /src/cmd/compile
parenta0b87a7478bb131efbbe9bb2ba6451d1b16ed0bf (diff)
downloadgo-acc1492b7d679914b485da0dd65d3faf202f4efa.tar.xz
[dev.simd] cmd/compile: Generated code for AVX2 SIMD masked load/store
This adds to the change in the earlier dev.simd CL. Generated by arch/internal/simdgen CL 689276 . Also includes one test for "it at least works once". Change-Id: I44a268cfc3bea06c5522ac2cfa04fe13a833e1dd Reviewed-on: https://go-review.googlesource.com/c/go/+/689335 Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/compile')
-rw-r--r--src/cmd/compile/internal/ssagen/simdintrinsics.go24
1 files changed, 24 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 8b3b08f886..cf2e7fc676 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -2132,6 +2132,30 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x4.Store", simdStore(), sys.AMD64)
addF(simdPackage, "LoadUint64x8", simdLoad(), sys.AMD64)
addF(simdPackage, "Uint64x8.Store", simdStore(), sys.AMD64)
+ addF(simdPackage, "LoadMaskedFloat32x4", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
+ addF(simdPackage, "Float32x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
+ addF(simdPackage, "LoadMaskedFloat32x8", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
+ addF(simdPackage, "Float32x8.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
+ addF(simdPackage, "LoadMaskedFloat64x2", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
+ addF(simdPackage, "Float64x2.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
+ addF(simdPackage, "LoadMaskedFloat64x4", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
+ addF(simdPackage, "Float64x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
+ addF(simdPackage, "LoadMaskedInt32x4", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
+ addF(simdPackage, "Int32x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
+ addF(simdPackage, "LoadMaskedInt32x8", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
+ addF(simdPackage, "Int32x8.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
+ addF(simdPackage, "LoadMaskedInt64x2", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
+ addF(simdPackage, "Int64x2.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
+ addF(simdPackage, "LoadMaskedInt64x4", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
+ addF(simdPackage, "Int64x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
+ addF(simdPackage, "LoadMaskedUint32x4", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
+ addF(simdPackage, "Uint32x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
+ addF(simdPackage, "LoadMaskedUint32x8", simdMaskedLoad(ssa.OpLoadMasked32), sys.AMD64)
+ addF(simdPackage, "Uint32x8.StoreMasked", simdMaskedStore(ssa.OpStoreMasked32), sys.AMD64)
+ addF(simdPackage, "LoadMaskedUint64x2", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
+ addF(simdPackage, "Uint64x2.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
+ addF(simdPackage, "LoadMaskedUint64x4", simdMaskedLoad(ssa.OpLoadMasked64), sys.AMD64)
+ addF(simdPackage, "Uint64x4.StoreMasked", simdMaskedStore(ssa.OpStoreMasked64), sys.AMD64)
addF(simdPackage, "Mask8x16.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int8x16.AsMask8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)