diff options
| author | Junyang Shao <shaojunyang@google.com> | 2025-11-13 17:07:16 +0000 |
|---|---|---|
| committer | Junyang Shao <shaojunyang@google.com> | 2025-11-17 13:37:22 -0800 |
| commit | e4d94842207a7f29fb473ecece2acdc5a2a207f7 (patch) | |
| tree | 637dece79df1216aa20ab6d12e83c9d43860be46 /src/cmd | |
| parent | d7a0c45642fef106b5443aa16937fd4bffb51d12 (diff) | |
| download | go-e4d94842207a7f29fb473ecece2acdc5a2a207f7.tar.xz | |
[dev.simd] cmd/compile: fix unstable output
This CL fixed an error left by CL 718160.
Change-Id: I442ea59bc1ff0dda2914d1858dd5ebe93e2818dc
Reviewed-on: https://go-review.googlesource.com/c/go/+/720281
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'src/cmd')
| -rw-r--r-- | src/cmd/compile/internal/amd64/simdssa.go | 252 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/simdAMD64.rules | 972 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go | 189 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/opGen.go | 1458 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewriteAMD64.go | 5393 |
5 files changed, 5293 insertions, 2971 deletions
diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 0abcd95e37..9425b42d41 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -42,22 +42,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPBROADCASTW512, ssa.OpAMD64VPBROADCASTD512, ssa.OpAMD64VPBROADCASTQ512, - ssa.OpAMD64VPMOVWB128, + ssa.OpAMD64VPMOVWB128_128, + ssa.OpAMD64VPMOVWB128_256, ssa.OpAMD64VPMOVWB256, - ssa.OpAMD64VPMOVDB128, - ssa.OpAMD64VPMOVQB128, - ssa.OpAMD64VPMOVSWB128, + ssa.OpAMD64VPMOVDB128_128, + ssa.OpAMD64VPMOVDB128_256, + ssa.OpAMD64VPMOVDB128_512, + ssa.OpAMD64VPMOVQB128_128, + ssa.OpAMD64VPMOVQB128_256, + ssa.OpAMD64VPMOVQB128_512, + ssa.OpAMD64VPMOVSWB128_128, + ssa.OpAMD64VPMOVSWB128_256, ssa.OpAMD64VPMOVSWB256, - ssa.OpAMD64VPMOVSDB128, - ssa.OpAMD64VPMOVSQB128, + ssa.OpAMD64VPMOVSDB128_128, + ssa.OpAMD64VPMOVSDB128_256, + ssa.OpAMD64VPMOVSDB128_512, + ssa.OpAMD64VPMOVSQB128_128, + ssa.OpAMD64VPMOVSQB128_256, + ssa.OpAMD64VPMOVSQB128_512, ssa.OpAMD64VPMOVSXBW256, ssa.OpAMD64VPMOVSXBW512, - ssa.OpAMD64VPMOVDW128, + ssa.OpAMD64VPMOVDW128_128, + ssa.OpAMD64VPMOVDW128_256, ssa.OpAMD64VPMOVDW256, - ssa.OpAMD64VPMOVQW128, - ssa.OpAMD64VPMOVSDW128, + ssa.OpAMD64VPMOVQW128_128, + ssa.OpAMD64VPMOVQW128_256, + ssa.OpAMD64VPMOVQW128_512, + ssa.OpAMD64VPMOVSDW128_128, + ssa.OpAMD64VPMOVSDW128_256, ssa.OpAMD64VPMOVSDW256, - ssa.OpAMD64VPMOVSQW128, + ssa.OpAMD64VPMOVSQW128_128, + ssa.OpAMD64VPMOVSQW128_256, + ssa.OpAMD64VPMOVSQW128_512, ssa.OpAMD64VPMOVSXBW128, ssa.OpAMD64VCVTTPS2DQ128, ssa.OpAMD64VCVTTPS2DQ256, @@ -65,9 +81,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBD512, ssa.OpAMD64VPMOVSXWD256, ssa.OpAMD64VPMOVSXWD512, - ssa.OpAMD64VPMOVQD128, + ssa.OpAMD64VPMOVQD128_128, + ssa.OpAMD64VPMOVQD128_256, ssa.OpAMD64VPMOVQD256, - ssa.OpAMD64VPMOVSQD128, + ssa.OpAMD64VPMOVSQD128_128, + ssa.OpAMD64VPMOVSQD128_256, ssa.OpAMD64VPMOVSQD256, ssa.OpAMD64VPMOVSXBD128, ssa.OpAMD64VPMOVSXWD128, @@ -80,15 +98,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXDQ128, ssa.OpAMD64VPMOVSXBQ256, ssa.OpAMD64VPMOVSXBQ512, - ssa.OpAMD64VPMOVUSWB128, + ssa.OpAMD64VPMOVUSWB128_128, + ssa.OpAMD64VPMOVUSWB128_256, ssa.OpAMD64VPMOVUSWB256, - ssa.OpAMD64VPMOVUSDB128, - ssa.OpAMD64VPMOVUSQB128, + ssa.OpAMD64VPMOVUSDB128_128, + ssa.OpAMD64VPMOVUSDB128_256, + ssa.OpAMD64VPMOVUSDB128_512, + ssa.OpAMD64VPMOVUSQB128_128, + ssa.OpAMD64VPMOVUSQB128_256, + ssa.OpAMD64VPMOVUSQB128_512, ssa.OpAMD64VPMOVZXBW256, ssa.OpAMD64VPMOVZXBW512, - ssa.OpAMD64VPMOVUSDW128, + ssa.OpAMD64VPMOVUSDW128_128, + ssa.OpAMD64VPMOVUSDW128_256, ssa.OpAMD64VPMOVUSDW256, - ssa.OpAMD64VPMOVUSQW128, + ssa.OpAMD64VPMOVUSQW128_128, + ssa.OpAMD64VPMOVUSQW128_256, + ssa.OpAMD64VPMOVUSQW128_512, ssa.OpAMD64VPMOVZXBW128, ssa.OpAMD64VCVTPS2UDQ128, ssa.OpAMD64VCVTPS2UDQ256, @@ -96,7 +122,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVZXBD512, ssa.OpAMD64VPMOVZXWD256, ssa.OpAMD64VPMOVZXWD512, - ssa.OpAMD64VPMOVUSQD128, + ssa.OpAMD64VPMOVUSQD128_128, + ssa.OpAMD64VPMOVUSQD128_256, ssa.OpAMD64VPMOVUSQD256, ssa.OpAMD64VPMOVZXBD128, ssa.OpAMD64VPMOVZXWD128, @@ -791,22 +818,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCOMPRESSQMasked128, ssa.OpAMD64VPCOMPRESSQMasked256, ssa.OpAMD64VPCOMPRESSQMasked512, - ssa.OpAMD64VPMOVWBMasked128, + ssa.OpAMD64VPMOVWBMasked128_128, + ssa.OpAMD64VPMOVWBMasked128_256, ssa.OpAMD64VPMOVWBMasked256, - ssa.OpAMD64VPMOVDBMasked128, - ssa.OpAMD64VPMOVQBMasked128, - ssa.OpAMD64VPMOVSWBMasked128, + ssa.OpAMD64VPMOVDBMasked128_128, + ssa.OpAMD64VPMOVDBMasked128_256, + ssa.OpAMD64VPMOVDBMasked128_512, + ssa.OpAMD64VPMOVQBMasked128_128, + ssa.OpAMD64VPMOVQBMasked128_256, + ssa.OpAMD64VPMOVQBMasked128_512, + ssa.OpAMD64VPMOVSWBMasked128_128, + ssa.OpAMD64VPMOVSWBMasked128_256, ssa.OpAMD64VPMOVSWBMasked256, - ssa.OpAMD64VPMOVSDBMasked128, - ssa.OpAMD64VPMOVSQBMasked128, + ssa.OpAMD64VPMOVSDBMasked128_128, + ssa.OpAMD64VPMOVSDBMasked128_256, + ssa.OpAMD64VPMOVSDBMasked128_512, + ssa.OpAMD64VPMOVSQBMasked128_128, + ssa.OpAMD64VPMOVSQBMasked128_256, + ssa.OpAMD64VPMOVSQBMasked128_512, ssa.OpAMD64VPMOVSXBWMasked256, ssa.OpAMD64VPMOVSXBWMasked512, - ssa.OpAMD64VPMOVDWMasked128, + ssa.OpAMD64VPMOVDWMasked128_128, + ssa.OpAMD64VPMOVDWMasked128_256, ssa.OpAMD64VPMOVDWMasked256, - ssa.OpAMD64VPMOVQWMasked128, - ssa.OpAMD64VPMOVSDWMasked128, + ssa.OpAMD64VPMOVQWMasked128_128, + ssa.OpAMD64VPMOVQWMasked128_256, + ssa.OpAMD64VPMOVQWMasked128_512, + ssa.OpAMD64VPMOVSDWMasked128_128, + ssa.OpAMD64VPMOVSDWMasked128_256, ssa.OpAMD64VPMOVSDWMasked256, - ssa.OpAMD64VPMOVSQWMasked128, + ssa.OpAMD64VPMOVSQWMasked128_128, + ssa.OpAMD64VPMOVSQWMasked128_256, + ssa.OpAMD64VPMOVSQWMasked128_512, ssa.OpAMD64VPMOVSXBWMasked128, ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked256, @@ -814,9 +857,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBDMasked512, ssa.OpAMD64VPMOVSXWDMasked256, ssa.OpAMD64VPMOVSXWDMasked512, - ssa.OpAMD64VPMOVQDMasked128, + ssa.OpAMD64VPMOVQDMasked128_128, + ssa.OpAMD64VPMOVQDMasked128_256, ssa.OpAMD64VPMOVQDMasked256, - ssa.OpAMD64VPMOVSQDMasked128, + ssa.OpAMD64VPMOVSQDMasked128_128, + ssa.OpAMD64VPMOVSQDMasked128_256, ssa.OpAMD64VPMOVSQDMasked256, ssa.OpAMD64VPMOVSXBDMasked128, ssa.OpAMD64VPMOVSXWDMasked128, @@ -829,15 +874,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXDQMasked128, ssa.OpAMD64VPMOVSXBQMasked256, ssa.OpAMD64VPMOVSXBQMasked512, - ssa.OpAMD64VPMOVUSWBMasked128, + ssa.OpAMD64VPMOVUSWBMasked128_128, + ssa.OpAMD64VPMOVUSWBMasked128_256, ssa.OpAMD64VPMOVUSWBMasked256, - ssa.OpAMD64VPMOVUSDBMasked128, - ssa.OpAMD64VPMOVUSQBMasked128, + ssa.OpAMD64VPMOVUSDBMasked128_128, + ssa.OpAMD64VPMOVUSDBMasked128_256, + ssa.OpAMD64VPMOVUSDBMasked128_512, + ssa.OpAMD64VPMOVUSQBMasked128_128, + ssa.OpAMD64VPMOVUSQBMasked128_256, + ssa.OpAMD64VPMOVUSQBMasked128_512, ssa.OpAMD64VPMOVZXBWMasked256, ssa.OpAMD64VPMOVZXBWMasked512, - ssa.OpAMD64VPMOVUSDWMasked128, + ssa.OpAMD64VPMOVUSDWMasked128_128, + ssa.OpAMD64VPMOVUSDWMasked128_256, ssa.OpAMD64VPMOVUSDWMasked256, - ssa.OpAMD64VPMOVUSQWMasked128, + ssa.OpAMD64VPMOVUSQWMasked128_128, + ssa.OpAMD64VPMOVUSQWMasked128_256, + ssa.OpAMD64VPMOVUSQWMasked128_512, ssa.OpAMD64VPMOVZXBWMasked128, ssa.OpAMD64VCVTPS2UDQMasked128, ssa.OpAMD64VCVTPS2UDQMasked256, @@ -845,7 +898,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVZXBDMasked512, ssa.OpAMD64VPMOVZXWDMasked256, ssa.OpAMD64VPMOVZXWDMasked512, - ssa.OpAMD64VPMOVUSQDMasked128, + ssa.OpAMD64VPMOVUSQDMasked128_128, + ssa.OpAMD64VPMOVUSQDMasked128_256, ssa.OpAMD64VPMOVUSQDMasked256, ssa.OpAMD64VPMOVZXBDMasked128, ssa.OpAMD64VPMOVZXWDMasked128, @@ -2266,22 +2320,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VREDUCEPDMasked128Merging, ssa.OpAMD64VREDUCEPDMasked256Merging, ssa.OpAMD64VREDUCEPDMasked512Merging, - ssa.OpAMD64VPMOVWBMasked128Merging, + ssa.OpAMD64VPMOVWBMasked128_128Merging, + ssa.OpAMD64VPMOVWBMasked128_256Merging, ssa.OpAMD64VPMOVWBMasked256Merging, - ssa.OpAMD64VPMOVDBMasked128Merging, - ssa.OpAMD64VPMOVQBMasked128Merging, - ssa.OpAMD64VPMOVSWBMasked128Merging, + ssa.OpAMD64VPMOVDBMasked128_128Merging, + ssa.OpAMD64VPMOVDBMasked128_256Merging, + ssa.OpAMD64VPMOVDBMasked128_512Merging, + ssa.OpAMD64VPMOVQBMasked128_128Merging, + ssa.OpAMD64VPMOVQBMasked128_256Merging, + ssa.OpAMD64VPMOVQBMasked128_512Merging, + ssa.OpAMD64VPMOVSWBMasked128_128Merging, + ssa.OpAMD64VPMOVSWBMasked128_256Merging, ssa.OpAMD64VPMOVSWBMasked256Merging, - ssa.OpAMD64VPMOVSDBMasked128Merging, - ssa.OpAMD64VPMOVSQBMasked128Merging, + ssa.OpAMD64VPMOVSDBMasked128_128Merging, + ssa.OpAMD64VPMOVSDBMasked128_256Merging, + ssa.OpAMD64VPMOVSDBMasked128_512Merging, + ssa.OpAMD64VPMOVSQBMasked128_128Merging, + ssa.OpAMD64VPMOVSQBMasked128_256Merging, + ssa.OpAMD64VPMOVSQBMasked128_512Merging, ssa.OpAMD64VPMOVSXBWMasked256Merging, ssa.OpAMD64VPMOVSXBWMasked512Merging, - ssa.OpAMD64VPMOVDWMasked128Merging, + ssa.OpAMD64VPMOVDWMasked128_128Merging, + ssa.OpAMD64VPMOVDWMasked128_256Merging, ssa.OpAMD64VPMOVDWMasked256Merging, - ssa.OpAMD64VPMOVQWMasked128Merging, - ssa.OpAMD64VPMOVSDWMasked128Merging, + ssa.OpAMD64VPMOVQWMasked128_128Merging, + ssa.OpAMD64VPMOVQWMasked128_256Merging, + ssa.OpAMD64VPMOVQWMasked128_512Merging, + ssa.OpAMD64VPMOVSDWMasked128_128Merging, + ssa.OpAMD64VPMOVSDWMasked128_256Merging, ssa.OpAMD64VPMOVSDWMasked256Merging, - ssa.OpAMD64VPMOVSQWMasked128Merging, + ssa.OpAMD64VPMOVSQWMasked128_128Merging, + ssa.OpAMD64VPMOVSQWMasked128_256Merging, + ssa.OpAMD64VPMOVSQWMasked128_512Merging, ssa.OpAMD64VPMOVSXBWMasked128Merging, ssa.OpAMD64VCVTTPS2DQMasked128Merging, ssa.OpAMD64VCVTTPS2DQMasked256Merging, @@ -2289,9 +2359,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBDMasked512Merging, ssa.OpAMD64VPMOVSXWDMasked256Merging, ssa.OpAMD64VPMOVSXWDMasked512Merging, - ssa.OpAMD64VPMOVQDMasked128Merging, + ssa.OpAMD64VPMOVQDMasked128_128Merging, + ssa.OpAMD64VPMOVQDMasked128_256Merging, ssa.OpAMD64VPMOVQDMasked256Merging, - ssa.OpAMD64VPMOVSQDMasked128Merging, + ssa.OpAMD64VPMOVSQDMasked128_128Merging, + ssa.OpAMD64VPMOVSQDMasked128_256Merging, ssa.OpAMD64VPMOVSQDMasked256Merging, ssa.OpAMD64VPMOVSXBDMasked128Merging, ssa.OpAMD64VPMOVSXWDMasked128Merging, @@ -2304,15 +2376,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXDQMasked128Merging, ssa.OpAMD64VPMOVSXBQMasked256Merging, ssa.OpAMD64VPMOVSXBQMasked512Merging, - ssa.OpAMD64VPMOVUSWBMasked128Merging, + ssa.OpAMD64VPMOVUSWBMasked128_128Merging, + ssa.OpAMD64VPMOVUSWBMasked128_256Merging, ssa.OpAMD64VPMOVUSWBMasked256Merging, - ssa.OpAMD64VPMOVUSDBMasked128Merging, - ssa.OpAMD64VPMOVUSQBMasked128Merging, + ssa.OpAMD64VPMOVUSDBMasked128_128Merging, + ssa.OpAMD64VPMOVUSDBMasked128_256Merging, + ssa.OpAMD64VPMOVUSDBMasked128_512Merging, + ssa.OpAMD64VPMOVUSQBMasked128_128Merging, + ssa.OpAMD64VPMOVUSQBMasked128_256Merging, + ssa.OpAMD64VPMOVUSQBMasked128_512Merging, ssa.OpAMD64VPMOVZXBWMasked256Merging, ssa.OpAMD64VPMOVZXBWMasked512Merging, - ssa.OpAMD64VPMOVUSDWMasked128Merging, + ssa.OpAMD64VPMOVUSDWMasked128_128Merging, + ssa.OpAMD64VPMOVUSDWMasked128_256Merging, ssa.OpAMD64VPMOVUSDWMasked256Merging, - ssa.OpAMD64VPMOVUSQWMasked128Merging, + ssa.OpAMD64VPMOVUSQWMasked128_128Merging, + ssa.OpAMD64VPMOVUSQWMasked128_256Merging, + ssa.OpAMD64VPMOVUSQWMasked128_512Merging, ssa.OpAMD64VPMOVZXBWMasked128Merging, ssa.OpAMD64VCVTPS2UDQMasked128Merging, ssa.OpAMD64VCVTPS2UDQMasked256Merging, @@ -2320,7 +2400,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVZXBDMasked512Merging, ssa.OpAMD64VPMOVZXWDMasked256Merging, ssa.OpAMD64VPMOVZXWDMasked512Merging, - ssa.OpAMD64VPMOVUSQDMasked128Merging, + ssa.OpAMD64VPMOVUSQDMasked128_128Merging, + ssa.OpAMD64VPMOVUSQDMasked128_256Merging, ssa.OpAMD64VPMOVUSQDMasked256Merging, ssa.OpAMD64VPMOVZXBDMasked128Merging, ssa.OpAMD64VPMOVZXWDMasked128Merging, @@ -2592,22 +2673,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCOMPRESSQMasked128, ssa.OpAMD64VPCOMPRESSQMasked256, ssa.OpAMD64VPCOMPRESSQMasked512, - ssa.OpAMD64VPMOVWBMasked128, + ssa.OpAMD64VPMOVWBMasked128_128, + ssa.OpAMD64VPMOVWBMasked128_256, ssa.OpAMD64VPMOVWBMasked256, - ssa.OpAMD64VPMOVDBMasked128, - ssa.OpAMD64VPMOVQBMasked128, - ssa.OpAMD64VPMOVSWBMasked128, + ssa.OpAMD64VPMOVDBMasked128_128, + ssa.OpAMD64VPMOVDBMasked128_256, + ssa.OpAMD64VPMOVDBMasked128_512, + ssa.OpAMD64VPMOVQBMasked128_128, + ssa.OpAMD64VPMOVQBMasked128_256, + ssa.OpAMD64VPMOVQBMasked128_512, + ssa.OpAMD64VPMOVSWBMasked128_128, + ssa.OpAMD64VPMOVSWBMasked128_256, ssa.OpAMD64VPMOVSWBMasked256, - ssa.OpAMD64VPMOVSDBMasked128, - ssa.OpAMD64VPMOVSQBMasked128, + ssa.OpAMD64VPMOVSDBMasked128_128, + ssa.OpAMD64VPMOVSDBMasked128_256, + ssa.OpAMD64VPMOVSDBMasked128_512, + ssa.OpAMD64VPMOVSQBMasked128_128, + ssa.OpAMD64VPMOVSQBMasked128_256, + ssa.OpAMD64VPMOVSQBMasked128_512, ssa.OpAMD64VPMOVSXBWMasked256, ssa.OpAMD64VPMOVSXBWMasked512, - ssa.OpAMD64VPMOVDWMasked128, + ssa.OpAMD64VPMOVDWMasked128_128, + ssa.OpAMD64VPMOVDWMasked128_256, ssa.OpAMD64VPMOVDWMasked256, - ssa.OpAMD64VPMOVQWMasked128, - ssa.OpAMD64VPMOVSDWMasked128, + ssa.OpAMD64VPMOVQWMasked128_128, + ssa.OpAMD64VPMOVQWMasked128_256, + ssa.OpAMD64VPMOVQWMasked128_512, + ssa.OpAMD64VPMOVSDWMasked128_128, + ssa.OpAMD64VPMOVSDWMasked128_256, ssa.OpAMD64VPMOVSDWMasked256, - ssa.OpAMD64VPMOVSQWMasked128, + ssa.OpAMD64VPMOVSQWMasked128_128, + ssa.OpAMD64VPMOVSQWMasked128_256, + ssa.OpAMD64VPMOVSQWMasked128_512, ssa.OpAMD64VPACKSSDWMasked128, ssa.OpAMD64VPACKSSDWMasked128load, ssa.OpAMD64VPACKSSDWMasked256, @@ -2624,9 +2721,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBDMasked512, ssa.OpAMD64VPMOVSXWDMasked256, ssa.OpAMD64VPMOVSXWDMasked512, - ssa.OpAMD64VPMOVQDMasked128, + ssa.OpAMD64VPMOVQDMasked128_128, + ssa.OpAMD64VPMOVQDMasked128_256, ssa.OpAMD64VPMOVQDMasked256, - ssa.OpAMD64VPMOVSQDMasked128, + ssa.OpAMD64VPMOVSQDMasked128_128, + ssa.OpAMD64VPMOVSQDMasked128_256, ssa.OpAMD64VPMOVSQDMasked256, ssa.OpAMD64VPMOVSXBDMasked128, ssa.OpAMD64VPMOVSXWDMasked128, @@ -2639,15 +2738,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXDQMasked128, ssa.OpAMD64VPMOVSXBQMasked256, ssa.OpAMD64VPMOVSXBQMasked512, - ssa.OpAMD64VPMOVUSWBMasked128, + ssa.OpAMD64VPMOVUSWBMasked128_128, + ssa.OpAMD64VPMOVUSWBMasked128_256, ssa.OpAMD64VPMOVUSWBMasked256, - ssa.OpAMD64VPMOVUSDBMasked128, - ssa.OpAMD64VPMOVUSQBMasked128, + ssa.OpAMD64VPMOVUSDBMasked128_128, + ssa.OpAMD64VPMOVUSDBMasked128_256, + ssa.OpAMD64VPMOVUSDBMasked128_512, + ssa.OpAMD64VPMOVUSQBMasked128_128, + ssa.OpAMD64VPMOVUSQBMasked128_256, + ssa.OpAMD64VPMOVUSQBMasked128_512, ssa.OpAMD64VPMOVZXBWMasked256, ssa.OpAMD64VPMOVZXBWMasked512, - ssa.OpAMD64VPMOVUSDWMasked128, + ssa.OpAMD64VPMOVUSDWMasked128_128, + ssa.OpAMD64VPMOVUSDWMasked128_256, ssa.OpAMD64VPMOVUSDWMasked256, - ssa.OpAMD64VPMOVUSQWMasked128, + ssa.OpAMD64VPMOVUSQWMasked128_128, + ssa.OpAMD64VPMOVUSQWMasked128_256, + ssa.OpAMD64VPMOVUSQWMasked128_512, ssa.OpAMD64VPACKUSDWMasked128, ssa.OpAMD64VPACKUSDWMasked128load, ssa.OpAMD64VPACKUSDWMasked256, @@ -2664,7 +2771,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVZXBDMasked512, ssa.OpAMD64VPMOVZXWDMasked256, ssa.OpAMD64VPMOVZXWDMasked512, - ssa.OpAMD64VPMOVUSQDMasked128, + ssa.OpAMD64VPMOVUSQDMasked128_128, + ssa.OpAMD64VPMOVUSQDMasked128_256, ssa.OpAMD64VPMOVUSQDMasked256, ssa.OpAMD64VPMOVZXBDMasked128, ssa.OpAMD64VPMOVZXWDMasked128, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 8332af2738..7ba970ca42 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -218,38 +218,38 @@ (CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask)) (CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask)) (CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask)) -(ConvertToInt8Int16x8 ...) => (VPMOVWB128 ...) -(ConvertToInt8Int16x16 ...) => (VPMOVWB128 ...) +(ConvertToInt8Int16x8 ...) => (VPMOVWB128_128 ...) +(ConvertToInt8Int16x16 ...) => (VPMOVWB128_256 ...) (ConvertToInt8Int16x32 ...) => (VPMOVWB256 ...) -(ConvertToInt8Int32x4 ...) => (VPMOVDB128 ...) -(ConvertToInt8Int32x8 ...) => (VPMOVDB128 ...) -(ConvertToInt8Int32x16 ...) => (VPMOVDB128 ...) -(ConvertToInt8Int64x2 ...) => (VPMOVQB128 ...) -(ConvertToInt8Int64x4 ...) => (VPMOVQB128 ...) -(ConvertToInt8Int64x8 ...) => (VPMOVQB128 ...) -(ConvertToInt8SaturatedInt16x8 ...) => (VPMOVSWB128 ...) -(ConvertToInt8SaturatedInt16x16 ...) => (VPMOVSWB128 ...) +(ConvertToInt8Int32x4 ...) => (VPMOVDB128_128 ...) +(ConvertToInt8Int32x8 ...) => (VPMOVDB128_256 ...) +(ConvertToInt8Int32x16 ...) => (VPMOVDB128_512 ...) +(ConvertToInt8Int64x2 ...) => (VPMOVQB128_128 ...) +(ConvertToInt8Int64x4 ...) => (VPMOVQB128_256 ...) +(ConvertToInt8Int64x8 ...) => (VPMOVQB128_512 ...) +(ConvertToInt8SaturatedInt16x8 ...) => (VPMOVSWB128_128 ...) +(ConvertToInt8SaturatedInt16x16 ...) => (VPMOVSWB128_256 ...) (ConvertToInt8SaturatedInt16x32 ...) => (VPMOVSWB256 ...) -(ConvertToInt8SaturatedInt32x4 ...) => (VPMOVSDB128 ...) -(ConvertToInt8SaturatedInt32x8 ...) => (VPMOVSDB128 ...) -(ConvertToInt8SaturatedInt32x16 ...) => (VPMOVSDB128 ...) -(ConvertToInt8SaturatedInt64x2 ...) => (VPMOVSQB128 ...) -(ConvertToInt8SaturatedInt64x4 ...) => (VPMOVSQB128 ...) -(ConvertToInt8SaturatedInt64x8 ...) => (VPMOVSQB128 ...) +(ConvertToInt8SaturatedInt32x4 ...) => (VPMOVSDB128_128 ...) +(ConvertToInt8SaturatedInt32x8 ...) => (VPMOVSDB128_256 ...) +(ConvertToInt8SaturatedInt32x16 ...) => (VPMOVSDB128_512 ...) +(ConvertToInt8SaturatedInt64x2 ...) => (VPMOVSQB128_128 ...) +(ConvertToInt8SaturatedInt64x4 ...) => (VPMOVSQB128_256 ...) +(ConvertToInt8SaturatedInt64x8 ...) => (VPMOVSQB128_512 ...) (ConvertToInt16Int8x16 ...) => (VPMOVSXBW256 ...) (ConvertToInt16Int8x32 ...) => (VPMOVSXBW512 ...) -(ConvertToInt16Int32x4 ...) => (VPMOVDW128 ...) -(ConvertToInt16Int32x8 ...) => (VPMOVDW128 ...) +(ConvertToInt16Int32x4 ...) => (VPMOVDW128_128 ...) +(ConvertToInt16Int32x8 ...) => (VPMOVDW128_256 ...) (ConvertToInt16Int32x16 ...) => (VPMOVDW256 ...) -(ConvertToInt16Int64x2 ...) => (VPMOVQW128 ...) -(ConvertToInt16Int64x4 ...) => (VPMOVQW128 ...) -(ConvertToInt16Int64x8 ...) => (VPMOVQW128 ...) -(ConvertToInt16SaturatedInt32x4 ...) => (VPMOVSDW128 ...) -(ConvertToInt16SaturatedInt32x8 ...) => (VPMOVSDW128 ...) +(ConvertToInt16Int64x2 ...) => (VPMOVQW128_128 ...) +(ConvertToInt16Int64x4 ...) => (VPMOVQW128_256 ...) +(ConvertToInt16Int64x8 ...) => (VPMOVQW128_512 ...) +(ConvertToInt16SaturatedInt32x4 ...) => (VPMOVSDW128_128 ...) +(ConvertToInt16SaturatedInt32x8 ...) => (VPMOVSDW128_256 ...) (ConvertToInt16SaturatedInt32x16 ...) => (VPMOVSDW256 ...) -(ConvertToInt16SaturatedInt64x2 ...) => (VPMOVSQW128 ...) -(ConvertToInt16SaturatedInt64x4 ...) => (VPMOVSQW128 ...) -(ConvertToInt16SaturatedInt64x8 ...) => (VPMOVSQW128 ...) +(ConvertToInt16SaturatedInt64x2 ...) => (VPMOVSQW128_128 ...) +(ConvertToInt16SaturatedInt64x4 ...) => (VPMOVSQW128_256 ...) +(ConvertToInt16SaturatedInt64x8 ...) => (VPMOVSQW128_512 ...) (ConvertToInt16SaturatedPackedInt32x4 ...) => (VPACKSSDW128 ...) (ConvertToInt16SaturatedPackedInt32x8 ...) => (VPACKSSDW256 ...) (ConvertToInt16SaturatedPackedInt32x16 ...) => (VPACKSSDW512 ...) @@ -260,11 +260,11 @@ (ConvertToInt32Int8x16 ...) => (VPMOVSXBD512 ...) (ConvertToInt32Int16x8 ...) => (VPMOVSXWD256 ...) (ConvertToInt32Int16x16 ...) => (VPMOVSXWD512 ...) -(ConvertToInt32Int64x2 ...) => (VPMOVQD128 ...) -(ConvertToInt32Int64x4 ...) => (VPMOVQD128 ...) +(ConvertToInt32Int64x2 ...) => (VPMOVQD128_128 ...) +(ConvertToInt32Int64x4 ...) => (VPMOVQD128_256 ...) (ConvertToInt32Int64x8 ...) => (VPMOVQD256 ...) -(ConvertToInt32SaturatedInt64x2 ...) => (VPMOVSQD128 ...) -(ConvertToInt32SaturatedInt64x4 ...) => (VPMOVSQD128 ...) +(ConvertToInt32SaturatedInt64x2 ...) => (VPMOVSQD128_128 ...) +(ConvertToInt32SaturatedInt64x4 ...) => (VPMOVSQD128_256 ...) (ConvertToInt32SaturatedInt64x8 ...) => (VPMOVSQD256 ...) (ConvertToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...) (ConvertToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...) @@ -277,38 +277,38 @@ (ConvertToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...) (ConvertToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...) (ConvertToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...) -(ConvertToUint8Uint16x8 ...) => (VPMOVWB128 ...) -(ConvertToUint8Uint16x16 ...) => (VPMOVWB128 ...) +(ConvertToUint8Uint16x8 ...) => (VPMOVWB128_128 ...) +(ConvertToUint8Uint16x16 ...) => (VPMOVWB128_256 ...) (ConvertToUint8Uint16x32 ...) => (VPMOVWB256 ...) -(ConvertToUint8Uint32x4 ...) => (VPMOVDB128 ...) -(ConvertToUint8Uint32x8 ...) => (VPMOVDB128 ...) -(ConvertToUint8Uint32x16 ...) => (VPMOVDB128 ...) -(ConvertToUint8Uint64x2 ...) => (VPMOVQB128 ...) -(ConvertToUint8Uint64x4 ...) => (VPMOVQB128 ...) -(ConvertToUint8Uint64x8 ...) => (VPMOVQB128 ...) -(ConvertToUint8SaturatedUint16x8 ...) => (VPMOVUSWB128 ...) -(ConvertToUint8SaturatedUint16x16 ...) => (VPMOVUSWB128 ...) +(ConvertToUint8Uint32x4 ...) => (VPMOVDB128_128 ...) +(ConvertToUint8Uint32x8 ...) => (VPMOVDB128_256 ...) +(ConvertToUint8Uint32x16 ...) => (VPMOVDB128_512 ...) +(ConvertToUint8Uint64x2 ...) => (VPMOVQB128_128 ...) +(ConvertToUint8Uint64x4 ...) => (VPMOVQB128_256 ...) +(ConvertToUint8Uint64x8 ...) => (VPMOVQB128_512 ...) +(ConvertToUint8SaturatedUint16x8 ...) => (VPMOVUSWB128_128 ...) +(ConvertToUint8SaturatedUint16x16 ...) => (VPMOVUSWB128_256 ...) (ConvertToUint8SaturatedUint16x32 ...) => (VPMOVUSWB256 ...) -(ConvertToUint8SaturatedUint32x4 ...) => (VPMOVUSDB128 ...) -(ConvertToUint8SaturatedUint32x8 ...) => (VPMOVUSDB128 ...) -(ConvertToUint8SaturatedUint32x16 ...) => (VPMOVUSDB128 ...) -(ConvertToUint8SaturatedUint64x2 ...) => (VPMOVUSQB128 ...) -(ConvertToUint8SaturatedUint64x4 ...) => (VPMOVUSQB128 ...) -(ConvertToUint8SaturatedUint64x8 ...) => (VPMOVUSQB128 ...) +(ConvertToUint8SaturatedUint32x4 ...) => (VPMOVUSDB128_128 ...) +(ConvertToUint8SaturatedUint32x8 ...) => (VPMOVUSDB128_256 ...) +(ConvertToUint8SaturatedUint32x16 ...) => (VPMOVUSDB128_512 ...) +(ConvertToUint8SaturatedUint64x2 ...) => (VPMOVUSQB128_128 ...) +(ConvertToUint8SaturatedUint64x4 ...) => (VPMOVUSQB128_256 ...) +(ConvertToUint8SaturatedUint64x8 ...) => (VPMOVUSQB128_512 ...) (ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...) (ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...) -(ConvertToUint16Uint32x4 ...) => (VPMOVDW128 ...) -(ConvertToUint16Uint32x8 ...) => (VPMOVDW128 ...) +(ConvertToUint16Uint32x4 ...) => (VPMOVDW128_128 ...) +(ConvertToUint16Uint32x8 ...) => (VPMOVDW128_256 ...) (ConvertToUint16Uint32x16 ...) => (VPMOVDW256 ...) -(ConvertToUint16Uint64x2 ...) => (VPMOVQW128 ...) -(ConvertToUint16Uint64x4 ...) => (VPMOVQW128 ...) -(ConvertToUint16Uint64x8 ...) => (VPMOVQW128 ...) -(ConvertToUint16SaturatedUint32x4 ...) => (VPMOVUSDW128 ...) -(ConvertToUint16SaturatedUint32x8 ...) => (VPMOVUSDW128 ...) +(ConvertToUint16Uint64x2 ...) => (VPMOVQW128_128 ...) +(ConvertToUint16Uint64x4 ...) => (VPMOVQW128_256 ...) +(ConvertToUint16Uint64x8 ...) => (VPMOVQW128_512 ...) +(ConvertToUint16SaturatedUint32x4 ...) => (VPMOVUSDW128_128 ...) +(ConvertToUint16SaturatedUint32x8 ...) => (VPMOVUSDW128_256 ...) (ConvertToUint16SaturatedUint32x16 ...) => (VPMOVUSDW256 ...) -(ConvertToUint16SaturatedUint64x2 ...) => (VPMOVUSQW128 ...) -(ConvertToUint16SaturatedUint64x4 ...) => (VPMOVUSQW128 ...) -(ConvertToUint16SaturatedUint64x8 ...) => (VPMOVUSQW128 ...) +(ConvertToUint16SaturatedUint64x2 ...) => (VPMOVUSQW128_128 ...) +(ConvertToUint16SaturatedUint64x4 ...) => (VPMOVUSQW128_256 ...) +(ConvertToUint16SaturatedUint64x8 ...) => (VPMOVUSQW128_512 ...) (ConvertToUint16SaturatedPackedUint32x4 ...) => (VPACKUSDW128 ...) (ConvertToUint16SaturatedPackedUint32x8 ...) => (VPACKUSDW256 ...) (ConvertToUint16SaturatedPackedUint32x16 ...) => (VPACKUSDW512 ...) @@ -319,11 +319,11 @@ (ConvertToUint32Uint8x16 ...) => (VPMOVZXBD512 ...) (ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...) (ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...) -(ConvertToUint32Uint64x2 ...) => (VPMOVQD128 ...) -(ConvertToUint32Uint64x4 ...) => (VPMOVQD128 ...) +(ConvertToUint32Uint64x2 ...) => (VPMOVQD128_128 ...) +(ConvertToUint32Uint64x4 ...) => (VPMOVQD128_256 ...) (ConvertToUint32Uint64x8 ...) => (VPMOVQD256 ...) -(ConvertToUint32SaturatedUint64x2 ...) => (VPMOVUSQD128 ...) -(ConvertToUint32SaturatedUint64x4 ...) => (VPMOVUSQD128 ...) +(ConvertToUint32SaturatedUint64x2 ...) => (VPMOVUSQD128_128 ...) +(ConvertToUint32SaturatedUint64x4 ...) => (VPMOVUSQD128_256 ...) (ConvertToUint32SaturatedUint64x8 ...) => (VPMOVUSQD256 ...) (ConvertToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...) (ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...) @@ -1423,22 +1423,38 @@ (VMOVDQU64Masked128 (VREDUCEPD128 [a] x) mask) => (VREDUCEPDMasked128 [a] x mask) (VMOVDQU64Masked256 (VREDUCEPD256 [a] x) mask) => (VREDUCEPDMasked256 [a] x mask) (VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask) -(VMOVDQU16Masked128 (VPMOVWB128 x) mask) => (VPMOVWBMasked128 x mask) +(VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) => (VPMOVWBMasked128_128 x mask) +(VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) => (VPMOVWBMasked128_256 x mask) (VMOVDQU16Masked256 (VPMOVWB256 x) mask) => (VPMOVWBMasked256 x mask) -(VMOVDQU32Masked128 (VPMOVDB128 x) mask) => (VPMOVDBMasked128 x mask) -(VMOVDQU64Masked128 (VPMOVQB128 x) mask) => (VPMOVQBMasked128 x mask) -(VMOVDQU16Masked128 (VPMOVSWB128 x) mask) => (VPMOVSWBMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) => (VPMOVDBMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) => (VPMOVDBMasked128_256 x mask) +(VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) => (VPMOVDBMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) => (VPMOVQBMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) => (VPMOVQBMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) => (VPMOVQBMasked128_512 x mask) +(VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) => (VPMOVSWBMasked128_128 x mask) +(VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) => (VPMOVSWBMasked128_256 x mask) (VMOVDQU16Masked256 (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256 x mask) -(VMOVDQU32Masked128 (VPMOVSDB128 x) mask) => (VPMOVSDBMasked128 x mask) -(VMOVDQU64Masked128 (VPMOVSQB128 x) mask) => (VPMOVSQBMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) => (VPMOVSDBMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) => (VPMOVSDBMasked128_256 x mask) +(VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) => (VPMOVSQBMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) => (VPMOVSQBMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512 x mask) (VMOVDQU8Masked256 (VPMOVSXBW256 x) mask) => (VPMOVSXBWMasked256 x mask) (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask) -(VMOVDQU32Masked128 (VPMOVDW128 x) mask) => (VPMOVDWMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) => (VPMOVDWMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) => (VPMOVDWMasked128_256 x mask) (VMOVDQU32Masked256 (VPMOVDW256 x) mask) => (VPMOVDWMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVQW128 x) mask) => (VPMOVQWMasked128 x mask) -(VMOVDQU32Masked128 (VPMOVSDW128 x) mask) => (VPMOVSDWMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) => (VPMOVQWMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) => (VPMOVQWMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) => (VPMOVQWMasked128_512 x mask) +(VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) => (VPMOVSDWMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) => (VPMOVSDWMasked128_256 x mask) (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVSQW128 x) mask) => (VPMOVSQWMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) => (VPMOVSQWMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) => (VPMOVSQWMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512 x mask) (VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask) (VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) => (VPACKSSDWMasked256 x y mask) (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512 x y mask) @@ -1449,9 +1465,11 @@ (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask) (VMOVDQU16Masked256 (VPMOVSXWD256 x) mask) => (VPMOVSXWDMasked256 x mask) (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask) -(VMOVDQU64Masked128 (VPMOVQD128 x) mask) => (VPMOVQDMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) => (VPMOVQDMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) => (VPMOVQDMasked128_256 x mask) (VMOVDQU64Masked256 (VPMOVQD256 x) mask) => (VPMOVQDMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVSQD128 x) mask) => (VPMOVSQDMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) => (VPMOVSQDMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) => (VPMOVSQDMasked128_256 x mask) (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256 x mask) (VMOVDQU8Masked128 (VPMOVSXBD128 x) mask) => (VPMOVSXBDMasked128 x mask) (VMOVDQU16Masked128 (VPMOVSXWD128 x) mask) => (VPMOVSXWDMasked128 x mask) @@ -1464,15 +1482,23 @@ (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask) (VMOVDQU8Masked256 (VPMOVSXBQ256 x) mask) => (VPMOVSXBQMasked256 x mask) (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask) -(VMOVDQU16Masked128 (VPMOVUSWB128 x) mask) => (VPMOVUSWBMasked128 x mask) +(VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) => (VPMOVUSWBMasked128_128 x mask) +(VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) => (VPMOVUSWBMasked128_256 x mask) (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256 x mask) -(VMOVDQU32Masked128 (VPMOVUSDB128 x) mask) => (VPMOVUSDBMasked128 x mask) -(VMOVDQU64Masked128 (VPMOVUSQB128 x) mask) => (VPMOVUSQBMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) => (VPMOVUSDBMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) => (VPMOVUSDBMasked128_256 x mask) +(VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) => (VPMOVUSQBMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) => (VPMOVUSQBMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512 x mask) (VMOVDQU8Masked256 (VPMOVZXBW256 x) mask) => (VPMOVZXBWMasked256 x mask) (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask) -(VMOVDQU32Masked128 (VPMOVUSDW128 x) mask) => (VPMOVUSDWMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) => (VPMOVUSDWMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) => (VPMOVUSDWMasked128_256 x mask) (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVUSQW128 x) mask) => (VPMOVUSQWMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) => (VPMOVUSQWMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) => (VPMOVUSQWMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512 x mask) (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask) (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask) (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask) @@ -1483,7 +1509,8 @@ (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask) (VMOVDQU16Masked256 (VPMOVZXWD256 x) mask) => (VPMOVZXWDMasked256 x mask) (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask) -(VMOVDQU64Masked128 (VPMOVUSQD128 x) mask) => (VPMOVUSQDMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) => (VPMOVUSQDMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) => (VPMOVUSQDMasked128_256 x mask) (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256 x mask) (VMOVDQU8Masked128 (VPMOVZXBD128 x) mask) => (VPMOVZXBDMasked128 x mask) (VMOVDQU16Masked128 (VPMOVZXWD128 x) mask) => (VPMOVZXWDMasked128 x mask) @@ -1862,424 +1889,451 @@ (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) => (VPSRAQMasked128const [a] x mask) (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) => (VPSRAQMasked256const [a] x mask) (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512const [a] x mask) -(VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512constMerging dst [a] x mask) -(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask) -(VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512Merging dst x mask) -(VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) => (VPMAXSWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMINUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) => (VPMULHWMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPROLQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMAXSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPADDSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPADDUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VBROADCASTSS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVSXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMINSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VMULPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) (VPBLENDMBMasked512 dst (VGF2P8MULB512 x y) mask) => (VGF2P8MULBMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) => (VMAXPSMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPOPCNTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VSUBPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSUBUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPMOVUSDB128 x) mask) => (VPMOVUSDBMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPMAXUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) => (VRSQRT14PSMasked512Merging dst x mask) -(VPBLENDVB256 dst (VPROLD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) => (VPROLQMasked512Merging dst [a] x mask) -(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSRAVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VADDPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVUSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMULLW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDMBMasked512 dst (VPABSB512 x) mask) => (VPABSBMasked512Merging dst x mask) +(VPBLENDMBMasked512 dst (VPADDB512 x y) mask) => (VPADDBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) => (VPADDSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) => (VPADDUSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) => (VPAVGBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) => (VPMAXSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) => (VPMAXUBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) => (VPMINSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) => (VPMINUBMasked512Merging dst x y mask) (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) => (VPOPCNTBMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) => (VPSHUFBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) => (VPSUBBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) => (VPSUBSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) => (VPSUBUSBMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VADDPS512 x y) mask) => (VADDPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) => (VDIVPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) => (VMAXPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VMINPS512 x y) mask) => (VMINPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VMULPS512 x y) mask) => (VMULPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPABSD512 x) mask) => (VPABSDMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPADDD512 x y) mask) => (VPADDDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPANDD512 x y) mask) => (VPANDDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) => (VPMAXSDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) => (VPMAXUDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) => (VPMINSDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) => (VPMINUDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPMOVDB128_512 x) mask) => (VPMOVDBMasked128_512Merging dst x mask) (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask) -(VPBLENDMQMasked512 dst (VPMOVUSQB128 x) mask) => (VPMOVUSQBMasked128Merging dst x mask) -(VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMAXSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMINSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPOPCNTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPORD512 x y) mask) => (VPORDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) => (VPROLDMasked512Merging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) => (VPROLVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) => (VPRORDMasked512Merging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) => (VPRORVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) => (VPSHLDDMasked512Merging dst [a] x y mask) +(VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) => (VPSHRDDMasked512Merging dst [a] x y mask) +(VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512Merging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) => (VPSLLDMasked512constMerging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) => (VRCP14PSMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPBROADCASTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) => (VPMOVWBMasked256Merging dst x mask) -(VPBLENDVB128 dst (VPRORVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSLLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSUBUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512Merging dst [a] x mask) -(VPBLENDVB256 dst (VPMAXSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VMINPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512Merging dst [a] x mask) +(VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) => (VRSQRT14PSMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) => (VSCALEFPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) => (VSQRTPSMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) => (VSUBPSMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VADDPD512 x y) mask) => (VADDPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) => (VDIVPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) => (VMAXPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VMINPD512 x y) mask) => (VMINPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VMULPD512 x y) mask) => (VMULPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPABSQ512 x) mask) => (VPABSQMasked512Merging dst x mask) (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) => (VPADDQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VBROADCASTSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) => (VPANDQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) => (VPMAXSQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) => (VPMAXUQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) => (VPMINSQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) => (VPMINUQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPMOVQB128_512 x) mask) => (VPMOVQBMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) => (VPMOVQDMasked256Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVQW128_512 x) mask) => (VPMOVQWMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPORQ512 x y) mask) => (VPORQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) => (VPROLQMasked512Merging dst [a] x mask) +(VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) => (VPROLVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) => (VPRORQMasked512Merging dst [a] x mask) +(VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) => (VPRORVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) => (VPSHLDQMasked512Merging dst [a] x y mask) +(VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) => (VPSHRDQMasked512Merging dst [a] x y mask) +(VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512constMerging dst [a] x mask) +(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask) +(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) => (VRCP14PDMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512Merging dst [a] x mask) (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) => (VRNDSCALEPDMasked512Merging dst [a] x mask) -(VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMINSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPADDSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPACKUSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) => (VRSQRT14PDMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) => (VSCALEFPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) => (VSQRTPDMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) => (VSUBPDMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPABSW512 x) mask) => (VPABSWMasked512Merging dst x mask) +(VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) => (VPADDSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) => (VPADDUSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPADDW512 x y) mask) => (VPADDWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) => (VPAVGWMasked512Merging dst x y mask) (VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPLZCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMAXUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPOPCNTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPROLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPABSQ512 x) mask) => (VPABSQMasked512Merging dst x mask) -(VPBLENDVB128 dst (VBROADCASTSD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VMINPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMULHW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) => (VPMADDWDMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) => (VPMAXSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) => (VPMAXUWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) => (VPMINSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) => (VPMINUWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256Merging dst x mask) +(VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256Merging dst x mask) +(VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) => (VPMOVWBMasked256Merging dst x mask) +(VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) => (VPMULHUWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) => (VPMULHWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) => (VPMULLWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) => (VPOPCNTWMasked512Merging dst x mask) (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) => (VPSHLDWMasked512Merging dst [a] x y mask) -(VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask) +(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask) +(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask) +(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask) +(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask) (VPBLENDVB128 dst (VADDPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVZXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) => (VDIVPSMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VDIVPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPLZCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask) -(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) => (VPMULHUWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPRORQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSLLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) => (VPSUBSBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPADDD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVSXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMINSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VADDPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VADDPD512 x y) mask) => (VADDPDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVSXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256Merging dst x mask) -(VPBLENDVB256 dst (VPOPCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPROLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPADDUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMAXSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMINUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMULLQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VSQRTPD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) => (VPMINSWMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) => (VRCP14PDMasked512Merging dst x mask) -(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPBROADCASTD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMADDWD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPROLD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSRAD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSUBUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPADDUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVZXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPROLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VBROADCASTSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VBROADCASTSD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VBROADCASTSS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VBROADCASTSS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VBROADCASTSS512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VDIVPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VDIVPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VMAXPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VMAXPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VMINPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VMINPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VMULPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VMULPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPABSB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPABSD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPABSQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPABSW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPACKSSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPACKUSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPADDB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) => (VPROLDMasked512Merging dst [a] x mask) -(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask) -(VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) => (VPSUBBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPADDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPADDD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPADDQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPADDSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPADDSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPADDUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPADDUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPADDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPAVGB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPAVGW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPBROADCASTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMINUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPORD512 x y) mask) => (VPORDMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMINSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMULLD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPRORD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPRORVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) => (VPRORVQMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VMINPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) => (VPSHLDDMasked512Merging dst [a] x y mask) -(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VSUBPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VSUBPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPADDW512 x y) mask) => (VPADDWMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) => (VPANDQMasked512Merging dst x y mask) +(VPBLENDVB128 dst (VPBROADCASTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPBROADCASTB512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512Merging dst x y mask) -(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask) -(VPBLENDVB128 dst (VRCP14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask) -(VPBLENDVB256 dst (VSQRTPS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVSXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPBROADCASTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPBROADCASTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPBROADCASTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPMOVQB128 x) mask) => (VPMOVQBMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPACKUSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) => (VPMINSBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPMULLD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPADDB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDMBMasked512 dst (VPADDB512 x y) mask) => (VPADDBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPADDD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMADDWD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) => (VPMAXSDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) => (VPSHLDQMasked512Merging dst [a] x y mask) -(VPBLENDVB128 dst (VBROADCASTSS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) => (VDIVPDMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VADDPS512 x y) mask) => (VADDPSMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVSXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask) -(VPBLENDVB256 dst (VPMULHUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMULLQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPROLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) => (VPROLVQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVUSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMAXUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMULLW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPRORD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) => (VPRORQMasked512Merging dst [a] x mask) -(VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VSUBPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPBROADCASTD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMINUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPRORVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSLLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPMOVSDB128 x) mask) => (VPMOVSDBMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPMOVUSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) => (VPMAXUBMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) => (VPMINSQMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) => (VSQRTPDMasked512Merging dst x mask) -(VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) => (VSUBPSMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSUBUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) => (VPMAXUDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VBROADCASTSS512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask) +(VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPBROADCASTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPBROADCASTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPBROADCASTW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPLZCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPLZCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMADDWD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMAXSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMAXSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMAXSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMAXSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMAXUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMAXUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMAXUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMAXUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMINSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMINSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMINSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMINSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMINUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMINUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMINUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMINUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VRSQRT14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) => (VPRORDMasked512Merging dst [a] x mask) -(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPABSW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPADDSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) => (VPADDUSBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPMOVZXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VMINPD512 x y) mask) => (VMINPDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) => (VPROLVDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSUBW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512Merging dst x mask) +(VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) => (VPMADDWDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPROLQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPABSD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPAVGB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) => (VPAVGBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPBROADCASTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VMAXPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) => (VPMINUBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMINUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VMULPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) => (VMAXPDMasked512Merging dst x y mask) -(VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) => (VPMAXSBMasked512Merging dst x y mask) +(VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPMULHUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VMULPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPRORVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMINUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) => (VPRORVDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VSCALEFPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSLLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSLLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPABSW512 x) mask) => (VPABSWMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VSCALEFPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPADDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VMULPD512 x y) mask) => (VMULPDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPORQ512 x y) mask) => (VPORQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVSXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPMOVUSQW128 x) mask) => (VPMOVUSQWMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPMINSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VRSQRT14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPABSQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512Merging dst [a] x mask) (VPBLENDVB128 dst (VPMULHW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask) -(VPBLENDMDMasked512 dst (VPADDD512 x y) mask) => (VPADDDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSUBSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) => (VPSUBUSBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPADDSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) => (VPADDUSWMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VMAXPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMAXSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMINSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VMULPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512Merging dst [a] x mask) -(VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512Merging dst x mask) -(VPBLENDVB256 dst (VDIVPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMAXSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VMINPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) => (VPSHUFBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VSCALEFPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) => (VPSHRDDMasked512Merging dst [a] x y mask) -(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VSQRTPD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPAVGW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VDIVPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VDIVPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMINSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) => (VPOPCNTWMasked512Merging dst x mask) +(VPBLENDVB128 dst (VPMULLD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMULLQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPMULLW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPOPCNTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPOPCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask) -(VPBLENDVB256 dst (VPABSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPMOVDB128 x) mask) => (VPMOVDBMasked128Merging dst x mask) -(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMINUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) => (VPMINUWMasked512Merging dst x y mask) (VPBLENDVB128 dst (VPOPCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) => (VPMOVQDMasked256Merging dst x mask) -(VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask) -(VPBLENDVB128 dst (VPAVGB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) => (VPAVGWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMAXSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMAXUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) => (VPMAXUQMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VMINPS512 x y) mask) => (VMINPSMasked512Merging dst x y mask) -(VPBLENDMBMasked512 dst (VPABSB512 x) mask) => (VPABSBMasked512Merging dst x mask) -(VPBLENDMDMasked512 dst (VPANDD512 x y) mask) => (VPANDDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVZXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVZXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMAXSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512Merging dst [a] x mask) +(VPBLENDVB128 dst (VPOPCNTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPROLD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPROLQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPROLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPROLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPRORD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPRORQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPRORVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPRORVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) => (VPMINSDMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) => (VSUBPDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSLLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256Merging dst x mask) -(VPBLENDMQMasked512 dst (VPMOVQW128 x) mask) => (VPMOVQWMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPMINUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VRCP14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) => (VPSHRDQMasked512Merging dst [a] x y mask) +(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSLLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSLLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSLLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSLLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRAD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRAVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPADDQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512Merging dst x mask) -(VPBLENDVB256 dst (VPMAXUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPRORQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) => (VSCALEFPDMasked512Merging dst x y mask) +(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSUBSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSUBUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSUBUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSUBW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VRCP14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VRSQRT14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VSCALEFPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VSCALEFPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VSQRTPD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VSQRTPS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VSUBPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VSUBPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VADDPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPMOVSQW128 x) mask) => (VPMOVSQWMasked128Merging dst x mask) -(VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) => (VPMAXUWMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSRAD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) => (VPMINUQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSRAVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSUBW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VADDPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VDIVPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VDIVPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VMAXPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VMAXPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VMINPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VMINPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VMULPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VMULPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPABSB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPABSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPABSQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPABSW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPACKSSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVSXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPLZCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPLZCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VMAXPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPACKUSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPADDB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPADDD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPADDQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPADDSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPADDSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPADDUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPADDUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPADDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPAVGB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPAVGW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPACKSSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMOVZXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPOPCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPSUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask) -(VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) => (VPADDSBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPBROADCASTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VMAXPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPLZCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPLZCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMADDWD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMAXSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMAXSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMAXSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMAXSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMAXUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMAXUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMAXUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPMAXUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask) -(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask) -(VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) => (VPSLLDMasked512constMerging dst [a] x mask) -(VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) => (VPADDSWMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPMOVSQB128 x) mask) => (VPMOVSQBMasked128Merging dst x mask) -(VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) => (VPMINUDMasked512Merging dst x y mask) +(VPBLENDVB256 dst (VPMINSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMINSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMINSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMINSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMINUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMINUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMINUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMINUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMOVZXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMULHUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMULHW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMULLD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMULLQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPMULLW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPOPCNTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPOPCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPOPCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPOPCNTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) => (VRSQRT14PDMasked512Merging dst x mask) -(VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) => (VSCALEFPSMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMAXUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPROLD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPROLQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPROLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPROLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPRORD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPRORQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPRORVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPRORVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSLLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSLLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSLLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRAD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRAVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) => (VSQRTPSMasked512Merging dst x mask) -(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask) -(VPBLENDVB128 dst (VPABSB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPABSB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPABSQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB256 dst (VPMOVDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) -(VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) => (VPMAXSQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VSCALEFPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VSQRTPS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSUBSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VPABSD512 x) mask) => (VPABSDMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPBROADCASTW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) -(VPBLENDVB128 dst (VPMAXUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) -(VPBLENDMDMasked512 dst (VMULPS512 x y) mask) => (VMULPSMasked512Merging dst x y mask) -(VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) => (VPMULLWMasked512Merging dst x y mask) +(VPBLENDVB256 dst (VPSUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSUBUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSUBUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSUBW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VRCP14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VRSQRT14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VSCALEFPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VSCALEFPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VSQRTPD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VSQRTPS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VSUBPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VSUBPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) (VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD512load {sym} [off] ptr mem) (VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ128load {sym} [off] ptr mem) (VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ256load {sym} [off] ptr mem) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 4f22d8582b..4e4f4a4205 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -585,37 +585,71 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMOVDB128", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVDW128", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDB128_128", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDB128_256", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDB128_512", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDW128_128", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDW128_256", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVDW256", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVQB128", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVQD128", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQB128_128", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQB128_256", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQB128_512", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQD128_128", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQD128_256", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVQD256", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQDMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQDMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVQW128", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSDB128", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSDW128", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQW128_128", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQW128_256", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQW128_512", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDB128_128", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDB128_256", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDB128_512", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDW128_128", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDW128_256", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSDW256", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSQB128", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSQD128", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQB128_128", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQB128_256", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQB128_512", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQD128_128", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQD128_256", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSQD256", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQDMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQDMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSQW128", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSWB128", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQW128_128", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQW128_256", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQW128_512", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSWB128_128", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSWB128_256", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSWB256", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVSXBD128", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSXBD256", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -653,27 +687,47 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPMOVUSDB128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSDW128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDB128_128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDB128_256", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDB128_512", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDW128_128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDW128_256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDW256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSQB128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQD128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQB128_128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQB128_256", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQB128_512", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQD128_128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQD128_256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQD256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQDMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQDMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSQW128", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSWB128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQW128_128", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQW128_256", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQW128_512", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWB128_128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWB128_256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSWB256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVWB128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWB128_128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWB128_256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVWB256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVZXBD128", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVZXBD256", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -2064,21 +2118,38 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMINUWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMINUWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPMOVDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVSXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: true}, @@ -2098,16 +2169,26 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVSXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVSXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPMOVUSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVUSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVUSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVZXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVZXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: true}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 4dd7faeebf..1d3875a9be 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1826,37 +1826,71 @@ const ( OpAMD64VPMINUWMasked128 OpAMD64VPMINUWMasked256 OpAMD64VPMINUWMasked512 - OpAMD64VPMOVDB128 - OpAMD64VPMOVDBMasked128 - OpAMD64VPMOVDW128 + OpAMD64VPMOVDB128_128 + OpAMD64VPMOVDB128_256 + OpAMD64VPMOVDB128_512 + OpAMD64VPMOVDBMasked128_128 + OpAMD64VPMOVDBMasked128_256 + OpAMD64VPMOVDBMasked128_512 + OpAMD64VPMOVDW128_128 + OpAMD64VPMOVDW128_256 OpAMD64VPMOVDW256 - OpAMD64VPMOVDWMasked128 + OpAMD64VPMOVDWMasked128_128 + OpAMD64VPMOVDWMasked128_256 OpAMD64VPMOVDWMasked256 - OpAMD64VPMOVQB128 - OpAMD64VPMOVQBMasked128 - OpAMD64VPMOVQD128 + OpAMD64VPMOVQB128_128 + OpAMD64VPMOVQB128_256 + OpAMD64VPMOVQB128_512 + OpAMD64VPMOVQBMasked128_128 + OpAMD64VPMOVQBMasked128_256 + OpAMD64VPMOVQBMasked128_512 + OpAMD64VPMOVQD128_128 + OpAMD64VPMOVQD128_256 OpAMD64VPMOVQD256 - OpAMD64VPMOVQDMasked128 + OpAMD64VPMOVQDMasked128_128 + OpAMD64VPMOVQDMasked128_256 OpAMD64VPMOVQDMasked256 - OpAMD64VPMOVQW128 - OpAMD64VPMOVQWMasked128 - OpAMD64VPMOVSDB128 - OpAMD64VPMOVSDBMasked128 - OpAMD64VPMOVSDW128 + OpAMD64VPMOVQW128_128 + OpAMD64VPMOVQW128_256 + OpAMD64VPMOVQW128_512 + OpAMD64VPMOVQWMasked128_128 + OpAMD64VPMOVQWMasked128_256 + OpAMD64VPMOVQWMasked128_512 + OpAMD64VPMOVSDB128_128 + OpAMD64VPMOVSDB128_256 + OpAMD64VPMOVSDB128_512 + OpAMD64VPMOVSDBMasked128_128 + OpAMD64VPMOVSDBMasked128_256 + OpAMD64VPMOVSDBMasked128_512 + OpAMD64VPMOVSDW128_128 + OpAMD64VPMOVSDW128_256 OpAMD64VPMOVSDW256 - OpAMD64VPMOVSDWMasked128 + OpAMD64VPMOVSDWMasked128_128 + OpAMD64VPMOVSDWMasked128_256 OpAMD64VPMOVSDWMasked256 - OpAMD64VPMOVSQB128 - OpAMD64VPMOVSQBMasked128 - OpAMD64VPMOVSQD128 + OpAMD64VPMOVSQB128_128 + OpAMD64VPMOVSQB128_256 + OpAMD64VPMOVSQB128_512 + OpAMD64VPMOVSQBMasked128_128 + OpAMD64VPMOVSQBMasked128_256 + OpAMD64VPMOVSQBMasked128_512 + OpAMD64VPMOVSQD128_128 + OpAMD64VPMOVSQD128_256 OpAMD64VPMOVSQD256 - OpAMD64VPMOVSQDMasked128 + OpAMD64VPMOVSQDMasked128_128 + OpAMD64VPMOVSQDMasked128_256 OpAMD64VPMOVSQDMasked256 - OpAMD64VPMOVSQW128 - OpAMD64VPMOVSQWMasked128 - OpAMD64VPMOVSWB128 + OpAMD64VPMOVSQW128_128 + OpAMD64VPMOVSQW128_256 + OpAMD64VPMOVSQW128_512 + OpAMD64VPMOVSQWMasked128_128 + OpAMD64VPMOVSQWMasked128_256 + OpAMD64VPMOVSQWMasked128_512 + OpAMD64VPMOVSWB128_128 + OpAMD64VPMOVSWB128_256 OpAMD64VPMOVSWB256 - OpAMD64VPMOVSWBMasked128 + OpAMD64VPMOVSWBMasked128_128 + OpAMD64VPMOVSWBMasked128_256 OpAMD64VPMOVSWBMasked256 OpAMD64VPMOVSXBD128 OpAMD64VPMOVSXBD256 @@ -1894,27 +1928,47 @@ const ( OpAMD64VPMOVSXWQMasked128 OpAMD64VPMOVSXWQMasked256 OpAMD64VPMOVSXWQMasked512 - OpAMD64VPMOVUSDB128 - OpAMD64VPMOVUSDBMasked128 - OpAMD64VPMOVUSDW128 + OpAMD64VPMOVUSDB128_128 + OpAMD64VPMOVUSDB128_256 + OpAMD64VPMOVUSDB128_512 + OpAMD64VPMOVUSDBMasked128_128 + OpAMD64VPMOVUSDBMasked128_256 + OpAMD64VPMOVUSDBMasked128_512 + OpAMD64VPMOVUSDW128_128 + OpAMD64VPMOVUSDW128_256 OpAMD64VPMOVUSDW256 - OpAMD64VPMOVUSDWMasked128 + OpAMD64VPMOVUSDWMasked128_128 + OpAMD64VPMOVUSDWMasked128_256 OpAMD64VPMOVUSDWMasked256 - OpAMD64VPMOVUSQB128 - OpAMD64VPMOVUSQBMasked128 - OpAMD64VPMOVUSQD128 + OpAMD64VPMOVUSQB128_128 + OpAMD64VPMOVUSQB128_256 + OpAMD64VPMOVUSQB128_512 + OpAMD64VPMOVUSQBMasked128_128 + OpAMD64VPMOVUSQBMasked128_256 + OpAMD64VPMOVUSQBMasked128_512 + OpAMD64VPMOVUSQD128_128 + OpAMD64VPMOVUSQD128_256 OpAMD64VPMOVUSQD256 - OpAMD64VPMOVUSQDMasked128 + OpAMD64VPMOVUSQDMasked128_128 + OpAMD64VPMOVUSQDMasked128_256 OpAMD64VPMOVUSQDMasked256 - OpAMD64VPMOVUSQW128 - OpAMD64VPMOVUSQWMasked128 - OpAMD64VPMOVUSWB128 + OpAMD64VPMOVUSQW128_128 + OpAMD64VPMOVUSQW128_256 + OpAMD64VPMOVUSQW128_512 + OpAMD64VPMOVUSQWMasked128_128 + OpAMD64VPMOVUSQWMasked128_256 + OpAMD64VPMOVUSQWMasked128_512 + OpAMD64VPMOVUSWB128_128 + OpAMD64VPMOVUSWB128_256 OpAMD64VPMOVUSWB256 - OpAMD64VPMOVUSWBMasked128 + OpAMD64VPMOVUSWBMasked128_128 + OpAMD64VPMOVUSWBMasked128_256 OpAMD64VPMOVUSWBMasked256 - OpAMD64VPMOVWB128 + OpAMD64VPMOVWB128_128 + OpAMD64VPMOVWB128_256 OpAMD64VPMOVWB256 - OpAMD64VPMOVWBMasked128 + OpAMD64VPMOVWBMasked128_128 + OpAMD64VPMOVWBMasked128_256 OpAMD64VPMOVWBMasked256 OpAMD64VPMOVZXBD128 OpAMD64VPMOVZXBD256 @@ -3305,21 +3359,38 @@ const ( OpAMD64VPMINUWMasked128Merging OpAMD64VPMINUWMasked256Merging OpAMD64VPMINUWMasked512Merging - OpAMD64VPMOVDBMasked128Merging - OpAMD64VPMOVDWMasked128Merging + OpAMD64VPMOVDBMasked128_128Merging + OpAMD64VPMOVDBMasked128_256Merging + OpAMD64VPMOVDBMasked128_512Merging + OpAMD64VPMOVDWMasked128_128Merging + OpAMD64VPMOVDWMasked128_256Merging OpAMD64VPMOVDWMasked256Merging - OpAMD64VPMOVQBMasked128Merging - OpAMD64VPMOVQDMasked128Merging + OpAMD64VPMOVQBMasked128_128Merging + OpAMD64VPMOVQBMasked128_256Merging + OpAMD64VPMOVQBMasked128_512Merging + OpAMD64VPMOVQDMasked128_128Merging + OpAMD64VPMOVQDMasked128_256Merging OpAMD64VPMOVQDMasked256Merging - OpAMD64VPMOVQWMasked128Merging - OpAMD64VPMOVSDBMasked128Merging - OpAMD64VPMOVSDWMasked128Merging + OpAMD64VPMOVQWMasked128_128Merging + OpAMD64VPMOVQWMasked128_256Merging + OpAMD64VPMOVQWMasked128_512Merging + OpAMD64VPMOVSDBMasked128_128Merging + OpAMD64VPMOVSDBMasked128_256Merging + OpAMD64VPMOVSDBMasked128_512Merging + OpAMD64VPMOVSDWMasked128_128Merging + OpAMD64VPMOVSDWMasked128_256Merging OpAMD64VPMOVSDWMasked256Merging - OpAMD64VPMOVSQBMasked128Merging - OpAMD64VPMOVSQDMasked128Merging + OpAMD64VPMOVSQBMasked128_128Merging + OpAMD64VPMOVSQBMasked128_256Merging + OpAMD64VPMOVSQBMasked128_512Merging + OpAMD64VPMOVSQDMasked128_128Merging + OpAMD64VPMOVSQDMasked128_256Merging OpAMD64VPMOVSQDMasked256Merging - OpAMD64VPMOVSQWMasked128Merging - OpAMD64VPMOVSWBMasked128Merging + OpAMD64VPMOVSQWMasked128_128Merging + OpAMD64VPMOVSQWMasked128_256Merging + OpAMD64VPMOVSQWMasked128_512Merging + OpAMD64VPMOVSWBMasked128_128Merging + OpAMD64VPMOVSWBMasked128_256Merging OpAMD64VPMOVSWBMasked256Merging OpAMD64VPMOVSXBDMasked128Merging OpAMD64VPMOVSXBDMasked256Merging @@ -3339,16 +3410,26 @@ const ( OpAMD64VPMOVSXWQMasked128Merging OpAMD64VPMOVSXWQMasked256Merging OpAMD64VPMOVSXWQMasked512Merging - OpAMD64VPMOVUSDBMasked128Merging - OpAMD64VPMOVUSDWMasked128Merging + OpAMD64VPMOVUSDBMasked128_128Merging + OpAMD64VPMOVUSDBMasked128_256Merging + OpAMD64VPMOVUSDBMasked128_512Merging + OpAMD64VPMOVUSDWMasked128_128Merging + OpAMD64VPMOVUSDWMasked128_256Merging OpAMD64VPMOVUSDWMasked256Merging - OpAMD64VPMOVUSQBMasked128Merging - OpAMD64VPMOVUSQDMasked128Merging + OpAMD64VPMOVUSQBMasked128_128Merging + OpAMD64VPMOVUSQBMasked128_256Merging + OpAMD64VPMOVUSQBMasked128_512Merging + OpAMD64VPMOVUSQDMasked128_128Merging + OpAMD64VPMOVUSQDMasked128_256Merging OpAMD64VPMOVUSQDMasked256Merging - OpAMD64VPMOVUSQWMasked128Merging - OpAMD64VPMOVUSWBMasked128Merging + OpAMD64VPMOVUSQWMasked128_128Merging + OpAMD64VPMOVUSQWMasked128_256Merging + OpAMD64VPMOVUSQWMasked128_512Merging + OpAMD64VPMOVUSWBMasked128_128Merging + OpAMD64VPMOVUSWBMasked128_256Merging OpAMD64VPMOVUSWBMasked256Merging - OpAMD64VPMOVWBMasked128Merging + OpAMD64VPMOVWBMasked128_128Merging + OpAMD64VPMOVWBMasked128_256Merging OpAMD64VPMOVWBMasked256Merging OpAMD64VPMOVZXBDMasked128Merging OpAMD64VPMOVZXBDMasked256Merging @@ -29124,7 +29205,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDB128", + name: "VPMOVDB128_128", argLen: 1, asm: x86.AVPMOVDB, reg: regInfo{ @@ -29137,7 +29218,47 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDBMasked128", + name: "VPMOVDB128_256", + argLen: 1, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDB128_512", + argLen: 1, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDBMasked128_128", + argLen: 2, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDBMasked128_256", argLen: 2, asm: x86.AVPMOVDB, reg: regInfo{ @@ -29151,7 +29272,34 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDW128", + name: "VPMOVDBMasked128_512", + argLen: 2, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDW128_128", + argLen: 1, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDW128_256", argLen: 1, asm: x86.AVPMOVDW, reg: regInfo{ @@ -29177,7 +29325,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDWMasked128", + name: "VPMOVDWMasked128_128", + argLen: 2, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDWMasked128_256", argLen: 2, asm: x86.AVPMOVDW, reg: regInfo{ @@ -29205,7 +29367,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQB128", + name: "VPMOVQB128_128", + argLen: 1, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQB128_256", argLen: 1, asm: x86.AVPMOVQB, reg: regInfo{ @@ -29218,7 +29393,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQBMasked128", + name: "VPMOVQB128_512", + argLen: 1, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQBMasked128_128", argLen: 2, asm: x86.AVPMOVQB, reg: regInfo{ @@ -29232,7 +29420,48 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQD128", + name: "VPMOVQBMasked128_256", + argLen: 2, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQBMasked128_512", + argLen: 2, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQD128_128", + argLen: 1, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQD128_256", argLen: 1, asm: x86.AVPMOVQD, reg: regInfo{ @@ -29258,7 +29487,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQDMasked128", + name: "VPMOVQDMasked128_128", + argLen: 2, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQDMasked128_256", argLen: 2, asm: x86.AVPMOVQD, reg: regInfo{ @@ -29286,7 +29529,33 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQW128", + name: "VPMOVQW128_128", + argLen: 1, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQW128_256", + argLen: 1, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQW128_512", argLen: 1, asm: x86.AVPMOVQW, reg: regInfo{ @@ -29299,7 +29568,35 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQWMasked128", + name: "VPMOVQWMasked128_128", + argLen: 2, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQWMasked128_256", + argLen: 2, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQWMasked128_512", argLen: 2, asm: x86.AVPMOVQW, reg: regInfo{ @@ -29313,7 +29610,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDB128", + name: "VPMOVSDB128_128", argLen: 1, asm: x86.AVPMOVSDB, reg: regInfo{ @@ -29326,7 +29623,47 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDBMasked128", + name: "VPMOVSDB128_256", + argLen: 1, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDB128_512", + argLen: 1, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDBMasked128_128", + argLen: 2, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDBMasked128_256", argLen: 2, asm: x86.AVPMOVSDB, reg: regInfo{ @@ -29340,7 +29677,34 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDW128", + name: "VPMOVSDBMasked128_512", + argLen: 2, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDW128_128", + argLen: 1, + asm: x86.AVPMOVSDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDW128_256", argLen: 1, asm: x86.AVPMOVSDW, reg: regInfo{ @@ -29366,7 +29730,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDWMasked128", + name: "VPMOVSDWMasked128_128", + argLen: 2, + asm: x86.AVPMOVSDW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDWMasked128_256", argLen: 2, asm: x86.AVPMOVSDW, reg: regInfo{ @@ -29394,7 +29772,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQB128", + name: "VPMOVSQB128_128", + argLen: 1, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQB128_256", argLen: 1, asm: x86.AVPMOVSQB, reg: regInfo{ @@ -29407,7 +29798,48 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQBMasked128", + name: "VPMOVSQB128_512", + argLen: 1, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQBMasked128_128", + argLen: 2, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQBMasked128_256", + argLen: 2, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQBMasked128_512", argLen: 2, asm: x86.AVPMOVSQB, reg: regInfo{ @@ -29421,7 +29853,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQD128", + name: "VPMOVSQD128_128", + argLen: 1, + asm: x86.AVPMOVSQD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQD128_256", argLen: 1, asm: x86.AVPMOVSQD, reg: regInfo{ @@ -29447,7 +29892,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQDMasked128", + name: "VPMOVSQDMasked128_128", + argLen: 2, + asm: x86.AVPMOVSQD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQDMasked128_256", argLen: 2, asm: x86.AVPMOVSQD, reg: regInfo{ @@ -29475,7 +29934,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQW128", + name: "VPMOVSQW128_128", + argLen: 1, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQW128_256", argLen: 1, asm: x86.AVPMOVSQW, reg: regInfo{ @@ -29488,7 +29960,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQWMasked128", + name: "VPMOVSQW128_512", + argLen: 1, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQWMasked128_128", argLen: 2, asm: x86.AVPMOVSQW, reg: regInfo{ @@ -29502,7 +29987,48 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSWB128", + name: "VPMOVSQWMasked128_256", + argLen: 2, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQWMasked128_512", + argLen: 2, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWB128_128", + argLen: 1, + asm: x86.AVPMOVSWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWB128_256", argLen: 1, asm: x86.AVPMOVSWB, reg: regInfo{ @@ -29528,7 +30054,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSWBMasked128", + name: "VPMOVSWBMasked128_128", + argLen: 2, + asm: x86.AVPMOVSWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWBMasked128_256", argLen: 2, asm: x86.AVPMOVSWB, reg: regInfo{ @@ -30042,7 +30582,33 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDB128", + name: "VPMOVUSDB128_128", + argLen: 1, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDB128_256", + argLen: 1, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDB128_512", argLen: 1, asm: x86.AVPMOVUSDB, reg: regInfo{ @@ -30055,7 +30621,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDBMasked128", + name: "VPMOVUSDBMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_256", argLen: 2, asm: x86.AVPMOVUSDB, reg: regInfo{ @@ -30069,7 +30649,34 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDW128", + name: "VPMOVUSDBMasked128_512", + argLen: 2, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDW128_128", + argLen: 1, + asm: x86.AVPMOVUSDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDW128_256", argLen: 1, asm: x86.AVPMOVUSDW, reg: regInfo{ @@ -30095,7 +30702,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDWMasked128", + name: "VPMOVUSDWMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSDW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDWMasked128_256", argLen: 2, asm: x86.AVPMOVUSDW, reg: regInfo{ @@ -30123,7 +30744,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQB128", + name: "VPMOVUSQB128_128", argLen: 1, asm: x86.AVPMOVUSQB, reg: regInfo{ @@ -30136,7 +30757,61 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQBMasked128", + name: "VPMOVUSQB128_256", + argLen: 1, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQB128_512", + argLen: 1, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_256", + argLen: 2, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_512", argLen: 2, asm: x86.AVPMOVUSQB, reg: regInfo{ @@ -30150,7 +30825,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQD128", + name: "VPMOVUSQD128_128", + argLen: 1, + asm: x86.AVPMOVUSQD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQD128_256", argLen: 1, asm: x86.AVPMOVUSQD, reg: regInfo{ @@ -30176,7 +30864,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQDMasked128", + name: "VPMOVUSQDMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSQD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQDMasked128_256", argLen: 2, asm: x86.AVPMOVUSQD, reg: regInfo{ @@ -30204,7 +30906,33 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQW128", + name: "VPMOVUSQW128_128", + argLen: 1, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQW128_256", + argLen: 1, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQW128_512", argLen: 1, asm: x86.AVPMOVUSQW, reg: regInfo{ @@ -30217,7 +30945,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQWMasked128", + name: "VPMOVUSQWMasked128_128", argLen: 2, asm: x86.AVPMOVUSQW, reg: regInfo{ @@ -30231,7 +30959,48 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSWB128", + name: "VPMOVUSQWMasked128_256", + argLen: 2, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQWMasked128_512", + argLen: 2, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWB128_128", + argLen: 1, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWB128_256", argLen: 1, asm: x86.AVPMOVUSWB, reg: regInfo{ @@ -30257,7 +31026,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSWBMasked128", + name: "VPMOVUSWBMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWBMasked128_256", argLen: 2, asm: x86.AVPMOVUSWB, reg: regInfo{ @@ -30285,7 +31068,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVWB128", + name: "VPMOVWB128_128", + argLen: 1, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVWB128_256", argLen: 1, asm: x86.AVPMOVWB, reg: regInfo{ @@ -30311,7 +31107,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVWBMasked128", + name: "VPMOVWBMasked128_128", + argLen: 2, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVWBMasked128_256", argLen: 2, asm: x86.AVPMOVWB, reg: regInfo{ @@ -52247,7 +53057,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDBMasked128Merging", + name: "VPMOVDBMasked128_128Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVDB, @@ -52263,7 +53073,55 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDWMasked128Merging", + name: "VPMOVDBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDBMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDWMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDWMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVDW, @@ -52295,7 +53153,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQBMasked128Merging", + name: "VPMOVQBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVQB, @@ -52311,7 +53185,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQDMasked128Merging", + name: "VPMOVQBMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQDMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQDMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVQD, @@ -52343,7 +53249,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQWMasked128Merging", + name: "VPMOVQWMasked128_128Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVQW, @@ -52359,7 +53265,55 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDBMasked128Merging", + name: "VPMOVQWMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQWMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSDB, @@ -52375,7 +53329,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDWMasked128Merging", + name: "VPMOVSDBMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDWMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDWMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSDW, @@ -52407,7 +53393,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQBMasked128Merging", + name: "VPMOVSQBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQBMasked128_512Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSQB, @@ -52423,7 +53441,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQDMasked128Merging", + name: "VPMOVSQDMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQDMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSQD, @@ -52455,7 +53489,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQWMasked128Merging", + name: "VPMOVSQWMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQWMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSQW, @@ -52471,7 +53521,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSWBMasked128Merging", + name: "VPMOVSQWMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSWB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSWB, @@ -52791,7 +53873,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDBMasked128Merging", + name: "VPMOVUSDBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSDB, @@ -52807,7 +53905,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDWMasked128Merging", + name: "VPMOVUSDBMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDWMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDWMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSDW, @@ -52839,7 +53969,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQBMasked128Merging", + name: "VPMOVUSQBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_512Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSQB, @@ -52855,7 +54017,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQDMasked128Merging", + name: "VPMOVUSQDMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQDMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSQD, @@ -52887,7 +54065,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQWMasked128Merging", + name: "VPMOVUSQWMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQWMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSQW, @@ -52903,7 +54097,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSWBMasked128Merging", + name: "VPMOVUSQWMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSWB, @@ -52935,7 +54161,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVWBMasked128Merging", + name: "VPMOVWBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVWBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVWB, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index bf0e79de0b..974af9d842 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -2574,19 +2574,19 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVDW256 return true case OpConvertToInt16Int32x4: - v.Op = OpAMD64VPMOVDW128 + v.Op = OpAMD64VPMOVDW128_128 return true case OpConvertToInt16Int32x8: - v.Op = OpAMD64VPMOVDW128 + v.Op = OpAMD64VPMOVDW128_256 return true case OpConvertToInt16Int64x2: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_128 return true case OpConvertToInt16Int64x4: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_256 return true case OpConvertToInt16Int64x8: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_512 return true case OpConvertToInt16Int8x16: v.Op = OpAMD64VPMOVSXBW256 @@ -2598,19 +2598,19 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVSDW256 return true case OpConvertToInt16SaturatedInt32x4: - v.Op = OpAMD64VPMOVSDW128 + v.Op = OpAMD64VPMOVSDW128_128 return true case OpConvertToInt16SaturatedInt32x8: - v.Op = OpAMD64VPMOVSDW128 + v.Op = OpAMD64VPMOVSDW128_256 return true case OpConvertToInt16SaturatedInt64x2: - v.Op = OpAMD64VPMOVSQW128 + v.Op = OpAMD64VPMOVSQW128_128 return true case OpConvertToInt16SaturatedInt64x4: - v.Op = OpAMD64VPMOVSQW128 + v.Op = OpAMD64VPMOVSQW128_256 return true case OpConvertToInt16SaturatedInt64x8: - v.Op = OpAMD64VPMOVSQW128 + v.Op = OpAMD64VPMOVSQW128_512 return true case OpConvertToInt16SaturatedPackedInt32x16: v.Op = OpAMD64VPACKSSDW512 @@ -2640,10 +2640,10 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVSXWD256 return true case OpConvertToInt32Int64x2: - v.Op = OpAMD64VPMOVQD128 + v.Op = OpAMD64VPMOVQD128_128 return true case OpConvertToInt32Int64x4: - v.Op = OpAMD64VPMOVQD128 + v.Op = OpAMD64VPMOVQD128_256 return true case OpConvertToInt32Int64x8: v.Op = OpAMD64VPMOVQD256 @@ -2652,10 +2652,10 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVSXBD512 return true case OpConvertToInt32SaturatedInt64x2: - v.Op = OpAMD64VPMOVSQD128 + v.Op = OpAMD64VPMOVSQD128_128 return true case OpConvertToInt32SaturatedInt64x4: - v.Op = OpAMD64VPMOVSQD128 + v.Op = OpAMD64VPMOVSQD128_256 return true case OpConvertToInt32SaturatedInt64x8: v.Op = OpAMD64VPMOVSQD256 @@ -2694,58 +2694,58 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVSXBQ512 return true case OpConvertToInt8Int16x16: - v.Op = OpAMD64VPMOVWB128 + v.Op = OpAMD64VPMOVWB128_256 return true case OpConvertToInt8Int16x32: v.Op = OpAMD64VPMOVWB256 return true case OpConvertToInt8Int16x8: - v.Op = OpAMD64VPMOVWB128 + v.Op = OpAMD64VPMOVWB128_128 return true case OpConvertToInt8Int32x16: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_512 return true case OpConvertToInt8Int32x4: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_128 return true case OpConvertToInt8Int32x8: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_256 return true case OpConvertToInt8Int64x2: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_128 return true case OpConvertToInt8Int64x4: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_256 return true case OpConvertToInt8Int64x8: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_512 return true case OpConvertToInt8SaturatedInt16x16: - v.Op = OpAMD64VPMOVSWB128 + v.Op = OpAMD64VPMOVSWB128_256 return true case OpConvertToInt8SaturatedInt16x32: v.Op = OpAMD64VPMOVSWB256 return true case OpConvertToInt8SaturatedInt16x8: - v.Op = OpAMD64VPMOVSWB128 + v.Op = OpAMD64VPMOVSWB128_128 return true case OpConvertToInt8SaturatedInt32x16: - v.Op = OpAMD64VPMOVSDB128 + v.Op = OpAMD64VPMOVSDB128_512 return true case OpConvertToInt8SaturatedInt32x4: - v.Op = OpAMD64VPMOVSDB128 + v.Op = OpAMD64VPMOVSDB128_128 return true case OpConvertToInt8SaturatedInt32x8: - v.Op = OpAMD64VPMOVSDB128 + v.Op = OpAMD64VPMOVSDB128_256 return true case OpConvertToInt8SaturatedInt64x2: - v.Op = OpAMD64VPMOVSQB128 + v.Op = OpAMD64VPMOVSQB128_128 return true case OpConvertToInt8SaturatedInt64x4: - v.Op = OpAMD64VPMOVSQB128 + v.Op = OpAMD64VPMOVSQB128_256 return true case OpConvertToInt8SaturatedInt64x8: - v.Op = OpAMD64VPMOVSQB128 + v.Op = OpAMD64VPMOVSQB128_512 return true case OpConvertToUint16SaturatedPackedUint32x16: v.Op = OpAMD64VPACKUSDW512 @@ -2760,37 +2760,37 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVUSDW256 return true case OpConvertToUint16SaturatedUint32x4: - v.Op = OpAMD64VPMOVUSDW128 + v.Op = OpAMD64VPMOVUSDW128_128 return true case OpConvertToUint16SaturatedUint32x8: - v.Op = OpAMD64VPMOVUSDW128 + v.Op = OpAMD64VPMOVUSDW128_256 return true case OpConvertToUint16SaturatedUint64x2: - v.Op = OpAMD64VPMOVUSQW128 + v.Op = OpAMD64VPMOVUSQW128_128 return true case OpConvertToUint16SaturatedUint64x4: - v.Op = OpAMD64VPMOVUSQW128 + v.Op = OpAMD64VPMOVUSQW128_256 return true case OpConvertToUint16SaturatedUint64x8: - v.Op = OpAMD64VPMOVUSQW128 + v.Op = OpAMD64VPMOVUSQW128_512 return true case OpConvertToUint16Uint32x16: v.Op = OpAMD64VPMOVDW256 return true case OpConvertToUint16Uint32x4: - v.Op = OpAMD64VPMOVDW128 + v.Op = OpAMD64VPMOVDW128_128 return true case OpConvertToUint16Uint32x8: - v.Op = OpAMD64VPMOVDW128 + v.Op = OpAMD64VPMOVDW128_256 return true case OpConvertToUint16Uint64x2: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_128 return true case OpConvertToUint16Uint64x4: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_256 return true case OpConvertToUint16Uint64x8: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_512 return true case OpConvertToUint16Uint8x16: v.Op = OpAMD64VPMOVZXBW256 @@ -2811,10 +2811,10 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VCVTPS2UDQ256 return true case OpConvertToUint32SaturatedUint64x2: - v.Op = OpAMD64VPMOVUSQD128 + v.Op = OpAMD64VPMOVUSQD128_128 return true case OpConvertToUint32SaturatedUint64x4: - v.Op = OpAMD64VPMOVUSQD128 + v.Op = OpAMD64VPMOVUSQD128_256 return true case OpConvertToUint32SaturatedUint64x8: v.Op = OpAMD64VPMOVUSQD256 @@ -2826,10 +2826,10 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVZXWD256 return true case OpConvertToUint32Uint64x2: - v.Op = OpAMD64VPMOVQD128 + v.Op = OpAMD64VPMOVQD128_128 return true case OpConvertToUint32Uint64x4: - v.Op = OpAMD64VPMOVQD128 + v.Op = OpAMD64VPMOVQD128_256 return true case OpConvertToUint32Uint64x8: v.Op = OpAMD64VPMOVQD256 @@ -2877,58 +2877,58 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVZXBQ512 return true case OpConvertToUint8SaturatedUint16x16: - v.Op = OpAMD64VPMOVUSWB128 + v.Op = OpAMD64VPMOVUSWB128_256 return true case OpConvertToUint8SaturatedUint16x32: v.Op = OpAMD64VPMOVUSWB256 return true case OpConvertToUint8SaturatedUint16x8: - v.Op = OpAMD64VPMOVUSWB128 + v.Op = OpAMD64VPMOVUSWB128_128 return true case OpConvertToUint8SaturatedUint32x16: - v.Op = OpAMD64VPMOVUSDB128 + v.Op = OpAMD64VPMOVUSDB128_512 return true case OpConvertToUint8SaturatedUint32x4: - v.Op = OpAMD64VPMOVUSDB128 + v.Op = OpAMD64VPMOVUSDB128_128 return true case OpConvertToUint8SaturatedUint32x8: - v.Op = OpAMD64VPMOVUSDB128 + v.Op = OpAMD64VPMOVUSDB128_256 return true case OpConvertToUint8SaturatedUint64x2: - v.Op = OpAMD64VPMOVUSQB128 + v.Op = OpAMD64VPMOVUSQB128_128 return true case OpConvertToUint8SaturatedUint64x4: - v.Op = OpAMD64VPMOVUSQB128 + v.Op = OpAMD64VPMOVUSQB128_256 return true case OpConvertToUint8SaturatedUint64x8: - v.Op = OpAMD64VPMOVUSQB128 + v.Op = OpAMD64VPMOVUSQB128_512 return true case OpConvertToUint8Uint16x16: - v.Op = OpAMD64VPMOVWB128 + v.Op = OpAMD64VPMOVWB128_256 return true case OpConvertToUint8Uint16x32: v.Op = OpAMD64VPMOVWB256 return true case OpConvertToUint8Uint16x8: - v.Op = OpAMD64VPMOVWB128 + v.Op = OpAMD64VPMOVWB128_128 return true case OpConvertToUint8Uint32x16: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_512 return true case OpConvertToUint8Uint32x4: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_128 return true case OpConvertToUint8Uint32x8: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_256 return true case OpConvertToUint8Uint64x2: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_128 return true case OpConvertToUint8Uint64x4: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_256 return true case OpConvertToUint8Uint64x8: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_512 return true case OpCopySignInt16x16: v.Op = OpAMD64VPSIGNW256 @@ -31243,27 +31243,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU16Masked128 (VPMOVWB128 x) mask) - // result: (VPMOVWBMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) + // result: (VPMOVWBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVWB128 { + if v_0.Op != OpAMD64VPMOVWB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVWBMasked128) + v.reset(OpAMD64VPMOVWBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU16Masked128 (VPMOVSWB128 x) mask) - // result: (VPMOVSWBMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) + // result: (VPMOVSWBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSWB128 { + if v_0.Op != OpAMD64VPMOVSWB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSWBMasked128) + v.reset(OpAMD64VPMOVSWBMasked128_128) v.AddArg2(x, mask) return true } @@ -31291,15 +31291,15 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU16Masked128 (VPMOVUSWB128 x) mask) - // result: (VPMOVUSWBMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) + // result: (VPMOVUSWBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSWB128 { + if v_0.Op != OpAMD64VPMOVUSWB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSWBMasked128) + v.reset(OpAMD64VPMOVUSWBMasked128_128) v.AddArg2(x, mask) return true } @@ -31781,6 +31781,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) + // result: (VPMOVWBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVWB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVWBMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPMOVWB256 x) mask) // result: (VPMOVWBMasked256 x mask) for { @@ -31793,6 +31805,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) + // result: (VPMOVSWBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSWB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSWBMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPMOVSWB256 x) mask) // result: (VPMOVSWBMasked256 x mask) for { @@ -31817,6 +31841,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) + // result: (VPMOVUSWBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSWB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSWBMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) // result: (VPMOVUSWBMasked256 x mask) for { @@ -32863,51 +32899,51 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVDB128 x) mask) - // result: (VPMOVDBMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) + // result: (VPMOVDBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVDB128 { + if v_0.Op != OpAMD64VPMOVDB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVDBMasked128) + v.reset(OpAMD64VPMOVDBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVSDB128 x) mask) - // result: (VPMOVSDBMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) + // result: (VPMOVSDBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSDB128 { + if v_0.Op != OpAMD64VPMOVSDB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSDBMasked128) + v.reset(OpAMD64VPMOVSDBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVDW128 x) mask) - // result: (VPMOVDWMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) + // result: (VPMOVDWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVDW128 { + if v_0.Op != OpAMD64VPMOVDW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVDWMasked128) + v.reset(OpAMD64VPMOVDWMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVSDW128 x) mask) - // result: (VPMOVSDWMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) + // result: (VPMOVSDWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSDW128 { + if v_0.Op != OpAMD64VPMOVSDW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSDWMasked128) + v.reset(OpAMD64VPMOVSDWMasked128_128) v.AddArg2(x, mask) return true } @@ -32948,27 +32984,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVUSDB128 x) mask) - // result: (VPMOVUSDBMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) + // result: (VPMOVUSDBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSDB128 { + if v_0.Op != OpAMD64VPMOVUSDB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSDBMasked128) + v.reset(OpAMD64VPMOVUSDBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVUSDW128 x) mask) - // result: (VPMOVUSDWMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) + // result: (VPMOVUSDWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSDW128 { + if v_0.Op != OpAMD64VPMOVUSDW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSDWMasked128) + v.reset(OpAMD64VPMOVUSDWMasked128_128) v.AddArg2(x, mask) return true } @@ -33626,6 +33662,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) + // result: (VPMOVDBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVDB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) + // result: (VPMOVSDBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) + // result: (VPMOVDWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVDW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDWMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPMOVDW256 x) mask) // result: (VPMOVDWMasked256 x mask) for { @@ -33638,6 +33710,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) + // result: (VPMOVSDWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDWMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) // result: (VPMOVSDWMasked256 x mask) for { @@ -33687,6 +33771,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) + // result: (VPMOVUSDBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) + // result: (VPMOVUSDWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDWMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) // result: (VPMOVUSDWMasked256 x mask) for { @@ -34405,6 +34513,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) + // result: (VPMOVDBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVDB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) + // result: (VPMOVSDBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) // result: (VPACKSSDWMasked512 x y mask) for { @@ -34442,6 +34574,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) + // result: (VPMOVUSDBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) // result: (VPACKUSDWMasked512 x y mask) for { @@ -35132,111 +35276,111 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVQB128 x) mask) - // result: (VPMOVQBMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) + // result: (VPMOVQBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVQB128 { + if v_0.Op != OpAMD64VPMOVQB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVQBMasked128) + v.reset(OpAMD64VPMOVQBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVSQB128 x) mask) - // result: (VPMOVSQBMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) + // result: (VPMOVSQBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSQB128 { + if v_0.Op != OpAMD64VPMOVSQB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSQBMasked128) + v.reset(OpAMD64VPMOVSQBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVQW128 x) mask) - // result: (VPMOVQWMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) + // result: (VPMOVQWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVQW128 { + if v_0.Op != OpAMD64VPMOVQW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVQWMasked128) + v.reset(OpAMD64VPMOVQWMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVSQW128 x) mask) - // result: (VPMOVSQWMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) + // result: (VPMOVSQWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSQW128 { + if v_0.Op != OpAMD64VPMOVSQW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSQWMasked128) + v.reset(OpAMD64VPMOVSQWMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVQD128 x) mask) - // result: (VPMOVQDMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) + // result: (VPMOVQDMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVQD128 { + if v_0.Op != OpAMD64VPMOVQD128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVQDMasked128) + v.reset(OpAMD64VPMOVQDMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVSQD128 x) mask) - // result: (VPMOVSQDMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) + // result: (VPMOVSQDMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSQD128 { + if v_0.Op != OpAMD64VPMOVSQD128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSQDMasked128) + v.reset(OpAMD64VPMOVSQDMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVUSQB128 x) mask) - // result: (VPMOVUSQBMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) + // result: (VPMOVUSQBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSQB128 { + if v_0.Op != OpAMD64VPMOVUSQB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSQBMasked128) + v.reset(OpAMD64VPMOVUSQBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVUSQW128 x) mask) - // result: (VPMOVUSQWMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) + // result: (VPMOVUSQWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSQW128 { + if v_0.Op != OpAMD64VPMOVUSQW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSQWMasked128) + v.reset(OpAMD64VPMOVUSQWMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVUSQD128 x) mask) - // result: (VPMOVUSQDMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) + // result: (VPMOVUSQDMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSQD128 { + if v_0.Op != OpAMD64VPMOVUSQD128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSQDMasked128) + v.reset(OpAMD64VPMOVUSQDMasked128_128) v.AddArg2(x, mask) return true } @@ -35839,6 +35983,66 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) + // result: (VPMOVQBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) + // result: (VPMOVSQBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) + // result: (VPMOVQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) + // result: (VPMOVSQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) + // result: (VPMOVQDMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQD128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQDMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPMOVQD256 x) mask) // result: (VPMOVQDMasked256 x mask) for { @@ -35851,6 +36055,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) + // result: (VPMOVSQDMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQD128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQDMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) // result: (VPMOVSQDMasked256 x mask) for { @@ -35863,6 +36079,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) + // result: (VPMOVUSQBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) + // result: (VPMOVUSQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) + // result: (VPMOVUSQDMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQD128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQDMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) // result: (VPMOVUSQDMasked256 x mask) for { @@ -36526,6 +36778,78 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) + // result: (VPMOVQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVQB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) + // result: (VPMOVSQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) + // result: (VPMOVQWMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVQW128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) + // result: (VPMOVSQWMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQW128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) + // result: (VPMOVUSQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) + // result: (VPMOVUSQWMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQW128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked512 (VDIVPD512 x y) mask) // result: (VDIVPDMasked512 x y mask) for { @@ -40279,114 +40603,114 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) - // result: (VPOPCNTBMasked512Merging dst x mask) + // match: (VPBLENDMBMasked512 dst (VPABSB512 x) mask) + // result: (VPABSBMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTB512 { + if v_1.Op != OpAMD64VPABSB512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPOPCNTBMasked512Merging) + v.reset(OpAMD64VPABSBMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) - // result: (VPSUBSBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPADDB512 x y) mask) + // result: (VPADDBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSB512 { + if v_1.Op != OpAMD64VPADDB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBSBMasked512Merging) + v.reset(OpAMD64VPADDBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) - // result: (VPSUBBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) + // result: (VPADDSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBB512 { + if v_1.Op != OpAMD64VPADDSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBBMasked512Merging) + v.reset(OpAMD64VPADDSBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) - // result: (VPMINSBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) + // result: (VPADDUSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSB512 { + if v_1.Op != OpAMD64VPADDUSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINSBMasked512Merging) + v.reset(OpAMD64VPADDUSBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPADDB512 x y) mask) - // result: (VPADDBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) + // result: (VPAVGBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDB512 { + if v_1.Op != OpAMD64VPAVGB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDBMasked512Merging) + v.reset(OpAMD64VPAVGBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) - // result: (VPMAXUBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) + // result: (VPMAXSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUB512 { + if v_1.Op != OpAMD64VPMAXSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXUBMasked512Merging) + v.reset(OpAMD64VPMAXSBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) - // result: (VPADDUSBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) + // result: (VPMAXUBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSB512 { + if v_1.Op != OpAMD64VPMAXUB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDUSBMasked512Merging) + v.reset(OpAMD64VPMAXUBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) - // result: (VPAVGBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) + // result: (VPMINSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGB512 { + if v_1.Op != OpAMD64VPMINSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPAVGBMasked512Merging) + v.reset(OpAMD64VPMINSBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -40404,72 +40728,72 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) - // result: (VPMAXSBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) + // result: (VPOPCNTBMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSB512 { + if v_1.Op != OpAMD64VPOPCNTB512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXSBMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPOPCNTBMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) - // result: (VPSUBUSBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) + // result: (VPSHUFBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSB512 { + if v_1.Op != OpAMD64VPSHUFB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBUSBMasked512Merging) + v.reset(OpAMD64VPSHUFBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) - // result: (VPSHUFBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) + // result: (VPSUBBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFB512 { + if v_1.Op != OpAMD64VPSUBB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHUFBMasked512Merging) + v.reset(OpAMD64VPSUBBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPABSB512 x) mask) - // result: (VPABSBMasked512Merging dst x mask) + // match: (VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) + // result: (VPSUBSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPABSB512 { + if v_1.Op != OpAMD64VPSUBSB512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPABSBMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSUBSBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) - // result: (VPADDSBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) + // result: (VPSUBUSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSB512 { + if v_1.Op != OpAMD64VPSUBUSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDSBMasked512Merging) + v.reset(OpAMD64VPSUBUSBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -40479,547 +40803,547 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) - // result: (VPMOVSDWMasked256Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VADDPS512 x y) mask) + // result: (VADDPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSDW256 { + if v_1.Op != OpAMD64VADDPS512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSDWMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VADDPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) - // result: (VPLZCNTDMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) + // result: (VCVTPS2UDQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTD512 { + if v_1.Op != OpAMD64VCVTPS2UDQ512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPLZCNTDMasked512Merging) + v.reset(OpAMD64VCVTPS2UDQMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) - // result: (VPMULLDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) + // result: (VCVTTPS2DQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLD512 { + if v_1.Op != OpAMD64VCVTTPS2DQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMULLDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VCVTTPS2DQMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) - // result: (VMAXPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) + // result: (VDIVPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPS512 { + if v_1.Op != OpAMD64VDIVPS512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMAXPSMasked512Merging) + v.reset(OpAMD64VDIVPSMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128 x) mask) - // result: (VPMOVUSDBMasked128Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) + // result: (VMAXPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDB128 { + if v_1.Op != OpAMD64VMAXPS512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSDBMasked128Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VMAXPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) - // result: (VRSQRT14PSMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VMINPS512 x y) mask) + // result: (VMINPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PS512 { + if v_1.Op != OpAMD64VMINPS512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRSQRT14PSMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VMINPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) - // result: (VPMOVDWMasked256Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VMULPS512 x y) mask) + // result: (VMULPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVDW256 { + if v_1.Op != OpAMD64VMULPS512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVDWMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VMULPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) - // result: (VRCP14PSMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPABSD512 x) mask) + // result: (VPABSDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VRCP14PS512 { + if v_1.Op != OpAMD64VPABSD512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRCP14PSMasked512Merging) + v.reset(OpAMD64VPABSDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) - // result: (VREDUCEPSMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) + // result: (VPACKSSDWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPS512 { + if v_1.Op != OpAMD64VPACKSSDW512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VREDUCEPSMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPACKSSDWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) - // result: (VDIVPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) + // result: (VPACKUSDWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPS512 { + if v_1.Op != OpAMD64VPACKUSDW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VDIVPSMasked512Merging) + v.reset(OpAMD64VPACKUSDWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) - // result: (VPSRLVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPADDD512 x y) mask) + // result: (VPADDDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVD512 { + if v_1.Op != OpAMD64VPADDD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLVDMasked512Merging) + v.reset(OpAMD64VPADDDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) - // result: (VPSUBDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPANDD512 x y) mask) + // result: (VPANDDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBD512 { + if v_1.Op != OpAMD64VPANDD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBDMasked512Merging) + v.reset(OpAMD64VPANDDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) - // result: (VPROLDMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) + // result: (VPLZCNTDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLD512 { + if v_1.Op != OpAMD64VPLZCNTD512 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPROLDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPLZCNTDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPORD512 x y) mask) - // result: (VPORDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) + // result: (VPMAXSDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPORD512 { + if v_1.Op != OpAMD64VPMAXSD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPORDMasked512Merging) + v.reset(OpAMD64VPMAXSDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) - // result: (VPSHLDDMasked512Merging dst [a] x y mask) + // match: (VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) + // result: (VPMAXUDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDD512 { + if v_1.Op != OpAMD64VPMAXUD512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHLDDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMAXUDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) - // result: (VPACKUSDWMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) + // result: (VPMINSDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPACKUSDW512 { + if v_1.Op != OpAMD64VPMINSD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPACKUSDWMasked512Merging) + v.reset(OpAMD64VPMINSDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) - // result: (VPMAXSDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) + // result: (VPMINUDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSD512 { + if v_1.Op != OpAMD64VPMINUD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXSDMasked512Merging) + v.reset(OpAMD64VPMINUDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VADDPS512 x y) mask) - // result: (VADDPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMOVDB128_512 x) mask) + // result: (VPMOVDBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VADDPS512 { + if v_1.Op != OpAMD64VPMOVDB128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VADDPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVDBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) - // result: (VPMOVUSDWMasked256Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) + // result: (VPMOVDWMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDW256 { + if v_1.Op != OpAMD64VPMOVDW256 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSDWMasked256Merging) + v.reset(OpAMD64VPMOVDWMasked256Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVSDB128 x) mask) - // result: (VPMOVSDBMasked128Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) + // result: (VPMOVSDBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSDB128 { + if v_1.Op != OpAMD64VPMOVSDB128_512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSDBMasked128Merging) + v.reset(OpAMD64VPMOVSDBMasked128_512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) - // result: (VSUBPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) + // result: (VPMOVSDWMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPS512 { + if v_1.Op != OpAMD64VPMOVSDW256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VSUBPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVSDWMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) - // result: (VPMAXUDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) + // result: (VPMOVUSDBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUD512 { + if v_1.Op != OpAMD64VPMOVUSDB128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXUDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVUSDBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) - // result: (VPRORDMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) + // result: (VPMOVUSDWMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORD512 { + if v_1.Op != OpAMD64VPMOVUSDW256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPRORDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVUSDWMasked256Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) - // result: (VPROLVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) + // result: (VPMULLDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVD512 { + if v_1.Op != OpAMD64VPMULLD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPROLVDMasked512Merging) + v.reset(OpAMD64VPMULLDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) - // result: (VCVTTPS2DQMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) + // result: (VPOPCNTDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2DQ512 { + if v_1.Op != OpAMD64VPOPCNTD512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VCVTTPS2DQMasked512Merging) + v.reset(OpAMD64VPOPCNTDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) - // result: (VPACKSSDWMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPORD512 x y) mask) + // result: (VPORDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPACKSSDW512 { + if v_1.Op != OpAMD64VPORD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPACKSSDWMasked512Merging) + v.reset(OpAMD64VPORDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) - // result: (VPRORVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) + // result: (VPROLDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVD512 { + if v_1.Op != OpAMD64VPROLD512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPRORVDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPROLDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPADDD512 x y) mask) - // result: (VPADDDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) + // result: (VPROLVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDD512 { + if v_1.Op != OpAMD64VPROLVD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDDMasked512Merging) + v.reset(OpAMD64VPROLVDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) - // result: (VRNDSCALEPSMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) + // result: (VPRORDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPS512 { + if v_1.Op != OpAMD64VPRORD512 { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRNDSCALEPSMasked512Merging) + v.reset(OpAMD64VPRORDMasked512Merging) v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) - // result: (VCVTPS2UDQMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) + // result: (VPRORVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VCVTPS2UDQ512 { + if v_1.Op != OpAMD64VPRORVD512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VCVTPS2UDQMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPRORVDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) - // result: (VPSHRDDMasked512Merging dst [a] x y mask) + // match: (VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) + // result: (VPSHLDDMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDD512 { + if v_1.Op != OpAMD64VPSHLDD512 { break } a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHRDDMasked512Merging) + v.reset(OpAMD64VPSHLDDMasked512Merging) v.AuxInt = uint8ToAuxInt(a) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) - // result: (VPOPCNTDMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) + // result: (VPSHRDDMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTD512 { + if v_1.Op != OpAMD64VPSHRDD512 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPOPCNTDMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSHRDDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVDB128 x) mask) - // result: (VPMOVDBMasked128Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) + // result: (VPSHUFDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVDB128 { + if v_1.Op != OpAMD64VPSHUFD512 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVDBMasked128Merging) + v.reset(OpAMD64VPSHUFDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) - // result: (VPSRADMasked512constMerging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) + // result: (VPSLLDMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAD512const { + if v_1.Op != OpAMD64VPSLLD512const { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRADMasked512constMerging) + v.reset(OpAMD64VPSLLDMasked512constMerging) v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VMINPS512 x y) mask) - // result: (VMINPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) + // result: (VPSLLVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VMINPS512 { + if v_1.Op != OpAMD64VPSLLVD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMINPSMasked512Merging) + v.reset(OpAMD64VPSLLVDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPANDD512 x y) mask) - // result: (VPANDDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) + // result: (VPSRADMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPANDD512 { + if v_1.Op != OpAMD64VPSRAD512const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPANDDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPSRADMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) - // result: (VPSHUFDMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) + // result: (VPSRAVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFD512 { + if v_1.Op != OpAMD64VPSRAVD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHUFDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSRAVDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) - // result: (VPMINSDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) + // result: (VPSRLVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSD512 { + if v_1.Op != OpAMD64VPSRLVD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINSDMasked512Merging) + v.reset(OpAMD64VPSRLVDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) - // result: (VPSRAVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) + // result: (VPSUBDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVD512 { + if v_1.Op != OpAMD64VPSUBD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAVDMasked512Merging) + v.reset(OpAMD64VPSUBDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -41037,47 +41361,60 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) - // result: (VPSLLVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) + // result: (VRCP14PSMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVD512 { + if v_1.Op != OpAMD64VRCP14PS512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLVDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VRCP14PSMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) - // result: (VPSLLDMasked512constMerging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) + // result: (VREDUCEPSMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLD512const { + if v_1.Op != OpAMD64VREDUCEPS512 { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked512constMerging) + v.reset(OpAMD64VREDUCEPSMasked512Merging) v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) - // result: (VPMINUDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) + // result: (VRNDSCALEPSMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUD512 { + if v_1.Op != OpAMD64VRNDSCALEPS512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINUDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VRNDSCALEPSMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) + // result: (VRSQRT14PSMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VRSQRT14PS512 { + break + } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VRSQRT14PSMasked512Merging) + v.AddArg3(dst, x, mask) return true } // match: (VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) @@ -41107,30 +41444,17 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPABSD512 x) mask) - // result: (VPABSDMasked512Merging dst x mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPABSD512 { - break - } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPABSDMasked512Merging) - v.AddArg3(dst, x, mask) - return true - } - // match: (VPBLENDMDMasked512 dst (VMULPS512 x y) mask) - // result: (VMULPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) + // result: (VSUBPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VMULPS512 { + if v_1.Op != OpAMD64VSUBPS512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMULPSMasked512Merging) + v.reset(OpAMD64VSUBPSMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -41163,104 +41487,74 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) - // result: (VPSLLQMasked512constMerging dst [a] x mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLQ512const { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSLLQMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) - return true - } - // match: (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) - // result: (VPSUBQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VADDPD512 x y) mask) + // result: (VADDPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBQ512 { + if v_1.Op != OpAMD64VADDPD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBQMasked512Merging) + v.reset(OpAMD64VADDPDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) - // result: (VPROLQMasked512Merging dst [a] x mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPROLQ512 { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPROLQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) - return true - } - // match: (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) - // result: (VPSLLVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) + // result: (VDIVPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVQ512 { + if v_1.Op != OpAMD64VDIVPD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLVQMasked512Merging) + v.reset(OpAMD64VDIVPDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128 x) mask) - // result: (VPMOVUSQBMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) + // result: (VMAXPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQB128 { + if v_1.Op != OpAMD64VMAXPD512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSQBMasked128Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VMAXPDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) - // result: (VPADDQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VMINPD512 x y) mask) + // result: (VMINPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDQ512 { + if v_1.Op != OpAMD64VMINPD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDQMasked512Merging) + v.reset(OpAMD64VMINPDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) - // result: (VRNDSCALEPDMasked512Merging dst [a] x mask) + // match: (VPBLENDMQMasked512 dst (VMULPD512 x y) mask) + // result: (VMULPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPD512 { + if v_1.Op != OpAMD64VMULPD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRNDSCALEPDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VMULPDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } // match: (VPBLENDMQMasked512 dst (VPABSQ512 x) mask) @@ -41276,226 +41570,218 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) - // result: (VPMOVUSQDMasked256Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) + // result: (VPADDQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQD256 { + if v_1.Op != OpAMD64VPADDQ512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSQDMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPADDQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VADDPD512 x y) mask) - // result: (VADDPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) + // result: (VPANDQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VADDPD512 { + if v_1.Op != OpAMD64VPANDQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VADDPDMasked512Merging) + v.reset(OpAMD64VPANDQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) - // result: (VRCP14PDMasked512Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) + // result: (VPLZCNTQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VRCP14PD512 { + if v_1.Op != OpAMD64VPLZCNTQ512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRCP14PDMasked512Merging) + v.reset(OpAMD64VPLZCNTQMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) - // result: (VPSRLVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) + // result: (VPMAXSQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVQ512 { + if v_1.Op != OpAMD64VPMAXSQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLVQMasked512Merging) + v.reset(OpAMD64VPMAXSQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) - // result: (VPRORVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) + // result: (VPMAXUQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVQ512 { + if v_1.Op != OpAMD64VPMAXUQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPRORVQMasked512Merging) + v.reset(OpAMD64VPMAXUQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) - // result: (VPSRAVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) + // result: (VPMINSQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVQ512 { + if v_1.Op != OpAMD64VPMINSQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAVQMasked512Merging) + v.reset(OpAMD64VPMINSQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) - // result: (VPANDQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) + // result: (VPMINUQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPANDQ512 { + if v_1.Op != OpAMD64VPMINUQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPANDQMasked512Merging) + v.reset(OpAMD64VPMINUQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVQB128 x) mask) - // result: (VPMOVQBMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPMOVQB128_512 x) mask) + // result: (VPMOVQBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVQB128 { + if v_1.Op != OpAMD64VPMOVQB128_512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVQBMasked128Merging) + v.reset(OpAMD64VPMOVQBMasked128_512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) - // result: (VPSHLDQMasked512Merging dst [a] x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) + // result: (VPMOVQDMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDQ512 { + if v_1.Op != OpAMD64VPMOVQD256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHLDQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVQDMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) - // result: (VDIVPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVQW128_512 x) mask) + // result: (VPMOVQWMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPD512 { + if v_1.Op != OpAMD64VPMOVQW128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VDIVPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVQWMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) - // result: (VPROLVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) + // result: (VPMOVSQBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVQ512 { + if v_1.Op != OpAMD64VPMOVSQB128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPROLVQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVSQBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) - // result: (VPRORQMasked512Merging dst [a] x mask) + // match: (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) + // result: (VPMOVSQDMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORQ512 { + if v_1.Op != OpAMD64VPMOVSQD256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPRORQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVSQDMasked256Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) - // result: (VPMINSQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) + // result: (VPMOVSQWMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSQ512 { + if v_1.Op != OpAMD64VPMOVSQW128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINSQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVSQWMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) - // result: (VSQRTPDMasked512Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) + // result: (VPMOVUSQBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPD512 { + if v_1.Op != OpAMD64VPMOVUSQB128_512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VSQRTPDMasked512Merging) + v.reset(OpAMD64VPMOVUSQBMasked128_512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) - // result: (VPMOVSQDMasked256Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) + // result: (VPMOVUSQDMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSQD256 { + if v_1.Op != OpAMD64VPMOVUSQD256 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSQDMasked256Merging) + v.reset(OpAMD64VPMOVUSQDMasked256Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VMINPD512 x y) mask) - // result: (VMINPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) + // result: (VPMOVUSQWMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VMINPD512 { + if v_1.Op != OpAMD64VPMOVUSQW128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMINPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVUSQWMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } // match: (VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) @@ -41512,237 +41798,263 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) - // result: (VMAXPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) + // result: (VPOPCNTQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPD512 { + if v_1.Op != OpAMD64VPOPCNTQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMAXPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPOPCNTQMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VMULPD512 x y) mask) - // result: (VMULPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPORQ512 x y) mask) + // result: (VPORQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VMULPD512 { + if v_1.Op != OpAMD64VPORQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMULPDMasked512Merging) + v.reset(OpAMD64VPORQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPORQ512 x y) mask) - // result: (VPORQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) + // result: (VPROLQMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPORQ512 { + if v_1.Op != OpAMD64VPROLQ512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPORQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPROLQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQW128 x) mask) - // result: (VPMOVUSQWMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) + // result: (VPROLVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQW128 { + if v_1.Op != OpAMD64VPROLVQ512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSQWMasked128Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPROLVQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) - // result: (VREDUCEPDMasked512Merging dst [a] x mask) + // match: (VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) + // result: (VPRORQMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPD512 { + if v_1.Op != OpAMD64VPRORQ512 { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VREDUCEPDMasked512Merging) + v.reset(OpAMD64VPRORQMasked512Merging) v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) - // result: (VPOPCNTQMasked512Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) + // result: (VPRORVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTQ512 { + if v_1.Op != OpAMD64VPRORVQ512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPOPCNTQMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPRORVQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) - // result: (VPXORQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) + // result: (VPSHLDQMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPXORQ512 { + if v_1.Op != OpAMD64VPSHLDQ512 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPXORQMasked512Merging) + v.reset(OpAMD64VPSHLDQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) - // result: (VPMOVQDMasked256Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) + // result: (VPSHRDQMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVQD256 { + if v_1.Op != OpAMD64VPSHRDQ512 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVQDMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSHRDQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) - // result: (VPMAXUQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) + // result: (VPSLLQMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUQ512 { + if v_1.Op != OpAMD64VPSLLQ512const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXUQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPSLLQMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) - // result: (VSUBPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) + // result: (VPSLLVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPD512 { + if v_1.Op != OpAMD64VPSLLVQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VSUBPDMasked512Merging) + v.reset(OpAMD64VPSLLVQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVQW128 x) mask) - // result: (VPMOVQWMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) + // result: (VPSRAQMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVQW128 { + if v_1.Op != OpAMD64VPSRAQ512const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVQWMasked128Merging) + v.reset(OpAMD64VPSRAQMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) - // result: (VPSHRDQMasked512Merging dst [a] x y mask) + // match: (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) + // result: (VPSRAVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDQ512 { + if v_1.Op != OpAMD64VPSRAVQ512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHRDQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSRAVQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) - // result: (VPLZCNTQMasked512Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) + // result: (VPSRLVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTQ512 { + if v_1.Op != OpAMD64VPSRLVQ512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPLZCNTQMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSRLVQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) - // result: (VSCALEFPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) + // result: (VPSUBQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPD512 { + if v_1.Op != OpAMD64VPSUBQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VSCALEFPDMasked512Merging) + v.reset(OpAMD64VPSUBQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVSQW128 x) mask) - // result: (VPMOVSQWMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) + // result: (VPXORQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSQW128 { + if v_1.Op != OpAMD64VPXORQ512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSQWMasked128Merging) + v.reset(OpAMD64VPXORQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) + // result: (VRCP14PDMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VRCP14PD512 { + break + } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VRCP14PDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) - // result: (VPMINUQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) + // result: (VREDUCEPDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUQ512 { + if v_1.Op != OpAMD64VREDUCEPD512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINUQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VREDUCEPDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVSQB128 x) mask) - // result: (VPMOVSQBMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) + // result: (VRNDSCALEPDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSQB128 { + if v_1.Op != OpAMD64VRNDSCALEPD512 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSQBMasked128Merging) + v.reset(OpAMD64VRNDSCALEPDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } @@ -41759,32 +42071,44 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) - // result: (VPSRAQMasked512constMerging dst [a] x mask) + // match: (VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) + // result: (VSCALEFPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAQ512const { + if v_1.Op != OpAMD64VSCALEFPD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VSCALEFPDMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) + // result: (VSQRTPDMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPD512 { + break + } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VSQRTPDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) - // result: (VPMAXSQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) + // result: (VSUBPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSQ512 { + if v_1.Op != OpAMD64VSUBPD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXSQMasked512Merging) + v.reset(OpAMD64VSUBPDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -41817,355 +42141,356 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) - // result: (VPMAXSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPABSW512 x) mask) + // result: (VPABSWMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSW512 { + if v_1.Op != OpAMD64VPABSW512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPABSWMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) - // result: (VPMULHWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) + // result: (VPADDSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHW512 { + if v_1.Op != OpAMD64VPADDSW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMULHWMasked512Merging) + v.reset(OpAMD64VPADDSWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) - // result: (VPMOVWBMasked256Merging dst x mask) + // match: (VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) + // result: (VPADDUSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVWB256 { + if v_1.Op != OpAMD64VPADDUSW512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVWBMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPADDUSWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) - // result: (VPMADDUBSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPADDW512 x y) mask) + // result: (VPADDWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDUBSW512 { + if v_1.Op != OpAMD64VPADDW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMADDUBSWMasked512Merging) + v.reset(OpAMD64VPADDWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) - // result: (VPSHLDWMasked512Merging dst [a] x y mask) + // match: (VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) + // result: (VPAVGWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDW512 { + if v_1.Op != OpAMD64VPAVGW512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHLDWMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPAVGWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) - // result: (VPMULHUWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) + // result: (VPMADDUBSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHUW512 { + if v_1.Op != OpAMD64VPMADDUBSW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMULHUWMasked512Merging) + v.reset(OpAMD64VPMADDUBSWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) - // result: (VPMOVUSWBMasked256Merging dst x mask) + // match: (VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) + // result: (VPMADDWDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSWB256 { + if v_1.Op != OpAMD64VPMADDWD512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSWBMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPMADDWDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) - // result: (VPMINSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) + // result: (VPMAXSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSW512 { + if v_1.Op != OpAMD64VPMAXSW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINSWMasked512Merging) + v.reset(OpAMD64VPMAXSWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) - // result: (VPSRAVWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) + // result: (VPMAXUWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVW512 { + if v_1.Op != OpAMD64VPMAXUW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAVWMasked512Merging) + v.reset(OpAMD64VPMAXUWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPADDW512 x y) mask) - // result: (VPADDWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) + // result: (VPMINSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDW512 { + if v_1.Op != OpAMD64VPMINSW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDWMasked512Merging) + v.reset(OpAMD64VPMINSWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) - // result: (VPSHUFHWMasked512Merging dst [a] x mask) + // match: (VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) + // result: (VPMINUWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFHW512 { + if v_1.Op != OpAMD64VPMINUW512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHUFHWMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPMINUWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) - // result: (VPSHRDWMasked512Merging dst [a] x y mask) + // match: (VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) + // result: (VPMOVSWBMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDW512 { + if v_1.Op != OpAMD64VPMOVSWB256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHRDWMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVSWBMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) - // result: (VPSUBSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) + // result: (VPMOVUSWBMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSW512 { + if v_1.Op != OpAMD64VPMOVUSWB256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVUSWBMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) - // result: (VPSUBUSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) + // result: (VPMOVWBMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSW512 { + if v_1.Op != OpAMD64VPMOVWB256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBUSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVWBMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) - // result: (VPSUBWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) + // result: (VPMULHUWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBW512 { + if v_1.Op != OpAMD64VPMULHUW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBWMasked512Merging) + v.reset(OpAMD64VPMULHUWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) - // result: (VPMADDWDMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) + // result: (VPMULHWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDWD512 { + if v_1.Op != OpAMD64VPMULHW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMADDWDMasked512Merging) + v.reset(OpAMD64VPMULHWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) - // result: (VPSLLVWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) + // result: (VPMULLWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVW512 { + if v_1.Op != OpAMD64VPMULLW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLVWMasked512Merging) + v.reset(OpAMD64VPMULLWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPABSW512 x) mask) - // result: (VPABSWMasked512Merging dst x mask) + // match: (VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) + // result: (VPOPCNTWMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPABSW512 { + if v_1.Op != OpAMD64VPOPCNTW512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPABSWMasked512Merging) + v.reset(OpAMD64VPOPCNTWMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) - // result: (VPSRAWMasked512constMerging dst [a] x mask) + // match: (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) + // result: (VPSHLDWMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAW512const { + if v_1.Op != OpAMD64VPSHLDW512 { break } a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAWMasked512constMerging) + v.reset(OpAMD64VPSHLDWMasked512Merging) v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) - // result: (VPADDUSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) + // result: (VPSHRDWMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSW512 { + if v_1.Op != OpAMD64VPSHRDW512 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDUSWMasked512Merging) + v.reset(OpAMD64VPSHRDWMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) - // result: (VPOPCNTWMasked512Merging dst x mask) + // match: (VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) + // result: (VPSHUFHWMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTW512 { + if v_1.Op != OpAMD64VPSHUFHW512 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPOPCNTWMasked512Merging) + v.reset(OpAMD64VPSHUFHWMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) - // result: (VPMINUWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) + // result: (VPSLLVWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUW512 { + if v_1.Op != OpAMD64VPSLLVW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINUWMasked512Merging) + v.reset(OpAMD64VPSLLVWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) - // result: (VPAVGWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) + // result: (VPSLLWMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGW512 { + if v_1.Op != OpAMD64VPSLLW512const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPAVGWMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPSLLWMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) - // result: (VPMOVSWBMasked256Merging dst x mask) + // match: (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) + // result: (VPSRAVWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSWB256 { + if v_1.Op != OpAMD64VPSRAVW512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSWBMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSRAVWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) - // result: (VPMAXUWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) + // result: (VPSRAWMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUW512 { + if v_1.Op != OpAMD64VPSRAW512const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXUWMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPSRAWMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } // match: (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) @@ -42182,46 +42507,45 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) - // result: (VPSLLWMasked512constMerging dst [a] x mask) + // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) + // result: (VPSUBSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLW512const { + if v_1.Op != OpAMD64VPSUBSW512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLWMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSUBSWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) - // result: (VPADDSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) + // result: (VPSUBUSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSW512 { + if v_1.Op != OpAMD64VPSUBUSW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDSWMasked512Merging) + v.reset(OpAMD64VPSUBUSWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) - // result: (VPMULLWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) + // result: (VPSUBWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLW512 { + if v_1.Op != OpAMD64VPSUBW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMULLWMasked512Merging) + v.reset(OpAMD64VPSUBWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -42232,12 +42556,12 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (VPBLENDVB128 dst (VPMINUD128 x y) mask) + // match: (VPBLENDVB128 dst (VADDPD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUD128 { + if v_1.Op != OpAMD64VADDPD128 { break } y := v_1.Args[1] @@ -42246,91 +42570,87 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VADDPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPROLQ128 [a] x) mask) + // match: (VPBLENDVB128 dst (VADDPS128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLQ128 { + if v_1.Op != OpAMD64VADDPS128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VADDPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) + // match: (VPBLENDVB128 dst (VBROADCASTSD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDUBSW128 { + if v_1.Op != OpAMD64VBROADCASTSD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMADDUBSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VBROADCASTSDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXSB128 x y) mask) + // match: (VPBLENDVB128 dst (VBROADCASTSD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSB128 { + if v_1.Op != OpAMD64VBROADCASTSD512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VBROADCASTSDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPADDSB128 x y) mask) + // match: (VPBLENDVB128 dst (VBROADCASTSS128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSB128 { + if v_1.Op != OpAMD64VBROADCASTSS128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VBROADCASTSSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } // match: (VPBLENDVB128 dst (VBROADCASTSS256 x) mask) @@ -42352,12 +42672,12 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBW128 x) mask) + // match: (VPBLENDVB128 dst (VBROADCASTSS512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBW128 { + if v_1.Op != OpAMD64VBROADCASTSS512 { break } x := v_1.Args[0] @@ -42365,18 +42685,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VBROADCASTSSMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMINSQ128 x y) mask) + // match: (VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSQ128 { + if v_1.Op != OpAMD64VCVTPS2UDQ128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VCVTPS2UDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2DQ128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VCVTTPS2DQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VDIVPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VDIVPD128 { break } y := v_1.Args[1] @@ -42385,18 +42743,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSQMasked128Merging) + v.reset(OpAMD64VDIVPDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBUSW128 x y) mask) + // match: (VPBLENDVB128 dst (VDIVPS128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSW128 { + if v_1.Op != OpAMD64VDIVPS128 { break } y := v_1.Args[1] @@ -42405,56 +42763,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBUSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VDIVPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) + // match: (VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBQ512 { + if v_1.Op != OpAMD64VGF2P8MULB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBQMasked512Merging) + v.reset(OpAMD64VGF2P8MULBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) + // match: (VPBLENDVB128 dst (VMAXPD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWQ256 { + if v_1.Op != OpAMD64VMAXPD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VMAXPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMULLW128 x y) mask) + // match: (VPBLENDVB128 dst (VMAXPS128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLW128 { + if v_1.Op != OpAMD64VMAXPS128 { break } y := v_1.Args[1] @@ -42463,59 +42823,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VMAXPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VMINPD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDQ128 { + if v_1.Op != OpAMD64VMINPD128 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VMINPDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) + // match: (VPBLENDVB128 dst (VMINPS128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBQ256 { + if v_1.Op != OpAMD64VMINPS128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VMINPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXSQ128 x y) mask) + // match: (VPBLENDVB128 dst (VMULPD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSQ128 { + if v_1.Op != OpAMD64VMULPD128 { break } y := v_1.Args[1] @@ -42524,37 +42883,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSQMasked128Merging) + v.reset(OpAMD64VMULPDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPOPCNTW128 x) mask) + // match: (VPBLENDVB128 dst (VMULPS128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTW128 { + if v_1.Op != OpAMD64VMULPS128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VMULPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTW128 x) mask) + // match: (VPBLENDVB128 dst (VPABSB128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTW128 { + if v_1.Op != OpAMD64VPABSB128 { break } x := v_1.Args[0] @@ -42562,38 +42922,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPABSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPRORVD128 x y) mask) + // match: (VPBLENDVB128 dst (VPABSD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVD128 { + if v_1.Op != OpAMD64VPABSD128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVDMasked128Merging) + v.reset(OpAMD64VPABSDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSD256 x) mask) + // match: (VPBLENDVB128 dst (VPABSQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSD256 { + if v_1.Op != OpAMD64VPABSQ128 { break } x := v_1.Args[0] @@ -42601,18 +42960,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VBROADCASTSDMasked256Merging) + v.reset(OpAMD64VPABSQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) + // match: (VPBLENDVB128 dst (VPABSW128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXDQ128 { + if v_1.Op != OpAMD64VPABSW128 { break } x := v_1.Args[0] @@ -42620,31 +42979,30 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXDQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPABSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPACKSSDW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAQ128const { + if v_1.Op != OpAMD64VPACKSSDW128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPACKSSDWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } // match: (VPBLENDVB128 dst (VPACKUSDW128 x y) mask) @@ -42667,31 +43025,32 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPLZCNTD128 x) mask) + // match: (VPBLENDVB128 dst (VPADDB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTD128 { + if v_1.Op != OpAMD64VPADDB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPADDBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXUD128 x y) mask) + // match: (VPBLENDVB128 dst (VPADDD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUD128 { + if v_1.Op != OpAMD64VPADDD128 { break } y := v_1.Args[1] @@ -42700,56 +43059,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUDMasked128Merging) + v.reset(OpAMD64VPADDDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPOPCNTB128 x) mask) + // match: (VPBLENDVB128 dst (VPADDQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTB128 { + if v_1.Op != OpAMD64VPADDQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPADDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSD512 x) mask) + // match: (VPBLENDVB128 dst (VPADDSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSD512 { + if v_1.Op != OpAMD64VPADDSB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VBROADCASTSDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPADDSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VMINPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPADDSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMINPD128 { + if v_1.Op != OpAMD64VPADDSW128 { break } y := v_1.Args[1] @@ -42758,40 +43119,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMINPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPADDSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VPADDUSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDW128 { + if v_1.Op != OpAMD64VPADDUSB128 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDWMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPADDUSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VADDPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPADDUSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VADDPD128 { + if v_1.Op != OpAMD64VPADDUSW128 { break } y := v_1.Args[1] @@ -42800,56 +43159,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VADDPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPADDUSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWD256 x) mask) + // match: (VPBLENDVB128 dst (VPADDW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWD256 { + if v_1.Op != OpAMD64VPADDW128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWDMasked256Merging) + v.reset(OpAMD64VPADDWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) + // match: (VPBLENDVB128 dst (VPAVGB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWQ256 { + if v_1.Op != OpAMD64VPAVGB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPAVGBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPAVGW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSW128 { + if v_1.Op != OpAMD64VPAVGW128 { break } y := v_1.Args[1] @@ -42858,39 +43219,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBSWMasked128Merging) + v.reset(OpAMD64VPAVGWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTB128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPD128 { + if v_1.Op != OpAMD64VPBROADCASTB128 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VREDUCEPDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBD256 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTB256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBD256 { + if v_1.Op != OpAMD64VPBROADCASTB256 { break } x := v_1.Args[0] @@ -42898,59 +43257,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBDMasked256Merging) + v.reset(OpAMD64VPBROADCASTBMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPRORQ128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTB512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORQ128 { + if v_1.Op != OpAMD64VPBROADCASTB512 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTBMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLVW128 x y) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVW128 { + if v_1.Op != OpAMD64VPBROADCASTD128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBW256 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBW256 { + if v_1.Op != OpAMD64VPBROADCASTD256 { break } x := v_1.Args[0] @@ -42958,58 +43314,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMINSD128 x y) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSD128 { + if v_1.Op != OpAMD64VPBROADCASTD512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSDMasked128Merging) + v.reset(OpAMD64VPBROADCASTDMasked512Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VADDPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VADDPS128 { + if v_1.Op != OpAMD64VPBROADCASTQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VADDPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBD256 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBD256 { + if v_1.Op != OpAMD64VPBROADCASTQ256 { break } x := v_1.Args[0] @@ -43017,18 +43371,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXDQ128 { + if v_1.Op != OpAMD64VPBROADCASTQ512 { break } x := v_1.Args[0] @@ -43036,58 +43390,113 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXDQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPROLVD128 x y) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTW128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVD128 { + if v_1.Op != OpAMD64VPBROADCASTW128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVDMasked128Merging) + v.reset(OpAMD64VPBROADCASTWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPBROADCASTW256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTW256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPBROADCASTW512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTW512 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTWMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPLZCNTD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPLZCNTD128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPLZCNTDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSRLVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPLZCNTQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVQ128 { + if v_1.Op != OpAMD64VPLZCNTQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVQMasked128Merging) + v.reset(OpAMD64VPLZCNTQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXSD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSD128 { + if v_1.Op != OpAMD64VPMADDUBSW128 { break } y := v_1.Args[1] @@ -43096,18 +43505,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMADDUBSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMINUB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMADDWD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUB128 { + if v_1.Op != OpAMD64VPMADDWD128 { break } y := v_1.Args[1] @@ -43116,18 +43525,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMADDWDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMULLQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMAXSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLQ128 { + if v_1.Op != OpAMD64VPMAXSB128 { break } y := v_1.Args[1] @@ -43136,18 +43545,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMAXSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMAXSD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBD128 { + if v_1.Op != OpAMD64VPMAXSD128 { break } y := v_1.Args[1] @@ -43156,37 +43565,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBDMasked128Merging) + v.reset(OpAMD64VPMAXSDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTD512 x) mask) + // match: (VPBLENDVB128 dst (VPMAXSQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTD512 { + if v_1.Op != OpAMD64VPMAXSQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMAXSQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMADDWD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMAXSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDWD128 { + if v_1.Op != OpAMD64VPMAXSW128 { break } y := v_1.Args[1] @@ -43195,60 +43605,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMADDWDMasked128Merging) + v.reset(OpAMD64VPMAXSWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPROLD128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPMAXUB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLD128 { + if v_1.Op != OpAMD64VPMAXUB128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAD128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPMAXUD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAD128const { + if v_1.Op != OpAMD64VPMAXUD128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRADMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMAXUDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBUSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMAXUQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSB128 { + if v_1.Op != OpAMD64VPMAXUQ128 { break } y := v_1.Args[1] @@ -43257,18 +43665,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBUSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPADDUSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMAXUW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSB128 { + if v_1.Op != OpAMD64VPMAXUW128 { break } y := v_1.Args[1] @@ -43277,56 +43685,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDUSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBW128 x) mask) + // match: (VPBLENDVB128 dst (VPMINSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBW128 { + if v_1.Op != OpAMD64VPMINSB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBWMasked128Merging) + v.reset(OpAMD64VPMINSBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) + // match: (VPBLENDVB128 dst (VPMINSD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXDQ256 { + if v_1.Op != OpAMD64VPMINSD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXDQMasked256Merging) + v.reset(OpAMD64VPMINSDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPROLVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMINSQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVQ128 { + if v_1.Op != OpAMD64VPMINSQ128 { break } y := v_1.Args[1] @@ -43335,18 +43745,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQMasked128Merging) + v.reset(OpAMD64VPMINSQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPADDB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMINSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDB128 { + if v_1.Op != OpAMD64VPMINSW128 { break } y := v_1.Args[1] @@ -43355,18 +43765,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDBMasked128Merging) + v.reset(OpAMD64VPMINSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VPMINUB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMINUB128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINUBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPADDQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMINUD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDQ128 { + if v_1.Op != OpAMD64VPMINUD128 { break } y := v_1.Args[1] @@ -43375,18 +43805,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDQMasked128Merging) + v.reset(OpAMD64VPMINUDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VPMINUQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMINUQ128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINUQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPADDUSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMINUW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSW128 { + if v_1.Op != OpAMD64VPMINUW128 { break } y := v_1.Args[1] @@ -43395,18 +43845,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDUSWMasked128Merging) + v.reset(OpAMD64VPMINUWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTB128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVDB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMOVDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTB128 { + if v_1.Op != OpAMD64VPMOVDB128_128 { break } x := v_1.Args[0] @@ -43414,140 +43864,132 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVDBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPMOVDW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPS128 { + if v_1.Op != OpAMD64VPMOVDW128_128 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRNDSCALEPSMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVDWMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMINUW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVQB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMOVQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUW128 { + if v_1.Op != OpAMD64VPMOVQB128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMINSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVQD128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMOVQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSW128 { + if v_1.Op != OpAMD64VPMOVQD128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQDMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMULLD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVQW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLD128 { + if v_1.Op != OpAMD64VPMOVQW128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHUFB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSDB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMOVSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFB128 { + if v_1.Op != OpAMD64VPMOVSDB128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSDBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPRORD128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPMOVSDW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORD128 { + if v_1.Op != OpAMD64VPMOVSDW128_128 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVSDWMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSQB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2DQ128 { + if v_1.Op != OpAMD64VPMOVSQB128_128 { break } x := v_1.Args[0] @@ -43555,58 +43997,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VCVTTPS2DQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VMINPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSQD128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMINPS128 { + if v_1.Op != OpAMD64VPMOVSQD128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMINPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQDMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VSUBPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSQW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMOVSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPD128 { + if v_1.Op != OpAMD64VPMOVSQW128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSUBPDMasked128Merging) + v.reset(OpAMD64VPMOVSQWMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTB512 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSWB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMOVSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTB512 { + if v_1.Op != OpAMD64VPMOVSWB128_128 { break } x := v_1.Args[0] @@ -43614,18 +44054,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTBMasked512Merging) + v.reset(OpAMD64VPMOVSWBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPMOVSXBD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBD128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXBDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VRCP14PD128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRCP14PD128 { + if v_1.Op != OpAMD64VPMOVSXBD256 { break } x := v_1.Args[0] @@ -43633,18 +44092,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWD256 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWD256 { + if v_1.Op != OpAMD64VPMOVSXBD512 { break } x := v_1.Args[0] @@ -43652,18 +44111,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTW256 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTW256 { + if v_1.Op != OpAMD64VPMOVSXBQ128 { break } x := v_1.Args[0] @@ -43671,18 +44130,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTD256 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTD256 { + if v_1.Op != OpAMD64VPMOVSXBQ256 { break } x := v_1.Args[0] @@ -43690,38 +44149,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPADDD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDD128 { + if v_1.Op != OpAMD64VPMOVSXBQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSS128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBW128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSS128 { + if v_1.Op != OpAMD64VPMOVSXBW128 { break } x := v_1.Args[0] @@ -43729,7 +44187,45 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VBROADCASTSSMasked128Merging) + v.reset(OpAMD64VPMOVSXBWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPMOVSXBW256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBW256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXBWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXDQ128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXDQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) @@ -43754,12 +44250,12 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBD512 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXWD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBD512 { + if v_1.Op != OpAMD64VPMOVSXWD128 { break } x := v_1.Args[0] @@ -43767,102 +44263,132 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXWDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXWD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDW128 { + if v_1.Op != OpAMD64VPMOVSXWD256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDWMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVSXWDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXUQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUQ128 { + if v_1.Op != OpAMD64VPMOVSXWQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXWQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDD128 { + if v_1.Op != OpAMD64VPMOVSXWQ256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVSXWQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXWQ512 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXWQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSDB128_128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSDBMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VSUBPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPS128 { + if v_1.Op != OpAMD64VPMOVUSDW128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSUBPSMasked128Merging) + v.reset(OpAMD64VPMOVUSDWMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTQ128 { + if v_1.Op != OpAMD64VPMOVUSQB128_128 { break } x := v_1.Args[0] @@ -43870,38 +44396,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTQMasked128Merging) + v.reset(OpAMD64VPMOVUSQBMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPRORVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVQ128 { + if v_1.Op != OpAMD64VPMOVUSQD128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQMasked128Merging) + v.reset(OpAMD64VPMOVUSQDMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSS512 x) mask) + // match: (VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSS512 { + if v_1.Op != OpAMD64VPMOVUSQW128_128 { break } x := v_1.Args[0] @@ -43909,8 +44434,46 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VBROADCASTSSMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSQWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSWB128_128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSWBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPMOVWB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVWB128_128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVWBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true @@ -43934,12 +44497,12 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBQ128 { + if v_1.Op != OpAMD64VPMOVZXBD256 { break } x := v_1.Args[0] @@ -43947,18 +44510,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBQMasked128Merging) + v.reset(OpAMD64VPMOVZXBDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPABSW128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSW128 { + if v_1.Op != OpAMD64VPMOVZXBD512 { break } x := v_1.Args[0] @@ -43966,38 +44529,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBW128 { + if v_1.Op != OpAMD64VPMOVZXBQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWQ128 { + if v_1.Op != OpAMD64VPMOVZXBQ256 { break } x := v_1.Args[0] @@ -44005,38 +44567,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VGF2P8MULB128 { + if v_1.Op != OpAMD64VPMOVZXBQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VGF2P8MULBMasked128Merging) + v.reset(OpAMD64VPMOVZXBQMasked512Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPABSD128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBW128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSD128 { + if v_1.Op != OpAMD64VPMOVZXBW128 { break } x := v_1.Args[0] @@ -44044,18 +44605,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTB256 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBW256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTB256 { + if v_1.Op != OpAMD64VPMOVZXBW256 { break } x := v_1.Args[0] @@ -44063,138 +44624,132 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTBMasked256Merging) + v.reset(OpAMD64VPMOVZXBWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VMAXPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPD128 { + if v_1.Op != OpAMD64VPMOVZXDQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMAXPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMINUQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUQ128 { + if v_1.Op != OpAMD64VPMOVZXDQ256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXDQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VMULPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMULPS128 { + if v_1.Op != OpAMD64VPMOVZXWD128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMULPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXWDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMULHUW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHUW128 { + if v_1.Op != OpAMD64VPMOVZXWD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULHUWMasked128Merging) + v.reset(OpAMD64VPMOVZXWDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VMULPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMULPD128 { + if v_1.Op != OpAMD64VPMOVZXWQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMULPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXWQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBB128 { + if v_1.Op != OpAMD64VPMOVZXWQ256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXWQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VCVTPS2UDQ128 { + if v_1.Op != OpAMD64VPMOVZXWQ512 { break } x := v_1.Args[0] @@ -44202,18 +44757,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VCVTPS2UDQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXWQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VSCALEFPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPMULHUW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPS128 { + if v_1.Op != OpAMD64VPMULHUW128 { break } y := v_1.Args[1] @@ -44222,18 +44777,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSCALEFPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMULHUWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMULHW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVQ128 { + if v_1.Op != OpAMD64VPMULHW128 { break } y := v_1.Args[1] @@ -44242,37 +44797,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMULHWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) + // match: (VPBLENDVB128 dst (VPMULLD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBQ256 { + if v_1.Op != OpAMD64VPMULLD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMULLDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPADDW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMULLQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDW128 { + if v_1.Op != OpAMD64VPMULLQ128 { break } y := v_1.Args[1] @@ -44281,37 +44837,57 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMULLQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWD128 x) mask) + // match: (VPBLENDVB128 dst (VPMULLW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWD128 { + if v_1.Op != OpAMD64VPMULLW128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWDMasked128Merging) + v.reset(OpAMD64VPMULLWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VPOPCNTB128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTB128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPOPCNTBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VRSQRT14PD128 x) mask) + // match: (VPBLENDVB128 dst (VPOPCNTD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PD128 { + if v_1.Op != OpAMD64VPOPCNTD128 { break } x := v_1.Args[0] @@ -44319,101 +44895,98 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRSQRT14PDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAW128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPOPCNTQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAW128const { + if v_1.Op != OpAMD64VPOPCNTQ128 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAWMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMULHW128 x y) mask) + // match: (VPBLENDVB128 dst (VPOPCNTW128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHW128 { + if v_1.Op != OpAMD64VPOPCNTW128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULHWMasked128Merging) + v.reset(OpAMD64VPOPCNTWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VPROLD128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDD128 { + if v_1.Op != OpAMD64VPROLD128 { break } a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDDMasked128Merging) + v.reset(OpAMD64VPROLDMasked128Merging) v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPROLQ128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSB128 { + if v_1.Op != OpAMD64VPROLQ128 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPROLQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPADDSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPROLVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSW128 { + if v_1.Op != OpAMD64VPROLVD128 { break } y := v_1.Args[1] @@ -44422,18 +44995,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPROLVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMINSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPROLVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSB128 { + if v_1.Op != OpAMD64VPROLVQ128 { break } y := v_1.Args[1] @@ -44442,18 +45015,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPROLVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPRORD128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFD128 { + if v_1.Op != OpAMD64VPRORD128 { break } a := auxIntToUint8(v_1.AuxInt) @@ -44462,215 +45035,232 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFDMasked128Merging) + v.reset(OpAMD64VPRORDMasked128Merging) v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) + // match: (VPBLENDVB128 dst (VPRORQ128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTQ512 { + if v_1.Op != OpAMD64VPRORQ128 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTQMasked512Merging) + v.reset(OpAMD64VPRORQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPRORVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPS128 { + if v_1.Op != OpAMD64VPRORVD128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VREDUCEPSMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPRORVDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) + // match: (VPBLENDVB128 dst (VPRORVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWQ512 { + if v_1.Op != OpAMD64VPRORVQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPRORVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAVW128 x y) mask) + // match: (VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVW128 { + if v_1.Op != OpAMD64VPSHLDD128 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPSHLDDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VSQRTPD128 x) mask) + // match: (VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPD128 { + if v_1.Op != OpAMD64VPSHLDQ128 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSQRTPDMasked128Merging) + v.reset(OpAMD64VPSHLDQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPAVGW128 x y) mask) + // match: (VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGW128 { + if v_1.Op != OpAMD64VPSHLDW128 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPAVGWMasked128Merging) + v.reset(OpAMD64VPSHLDWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VDIVPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPS128 { + if v_1.Op != OpAMD64VPSHRDD128 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VDIVPSMasked128Merging) + v.reset(OpAMD64VPSHRDDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VDIVPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPD128 { + if v_1.Op != OpAMD64VPSHRDQ128 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VDIVPDMasked128Merging) + v.reset(OpAMD64VPSHRDQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPOPCNTD128 x) mask) + // match: (VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTD128 { + if v_1.Op != OpAMD64VPSHRDW128 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPSHRDWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) + // match: (VPBLENDVB128 dst (VPSHUFB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTQ256 { + if v_1.Op != OpAMD64VPSHUFB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPSHUFBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPD128 { + if v_1.Op != OpAMD64VPSHUFD128 { break } a := auxIntToUint8(v_1.AuxInt) @@ -44679,57 +45269,82 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRNDSCALEPDMasked128Merging) + v.reset(OpAMD64VPSHUFDMasked128Merging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) + // match: (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWQ128 { + if v_1.Op != OpAMD64VPSHUFHW128 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWQMasked128Merging) + v.reset(OpAMD64VPSHUFHWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPOPCNTQ128 x) mask) + // match: (VPBLENDVB128 dst (VPSLLD128const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTQ128 { + if v_1.Op != OpAMD64VPSLLD128const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQMasked128Merging) + v.reset(OpAMD64VPSLLDMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSLLQ128const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSLLQMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPAVGB128 x y) mask) + // match: (VPBLENDVB128 dst (VPSLLVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGB128 { + if v_1.Op != OpAMD64VPSLLVD128 { break } y := v_1.Args[1] @@ -44738,37 +45353,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPAVGBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPSLLVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) + // match: (VPBLENDVB128 dst (VPSLLVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBQ128 { + if v_1.Op != OpAMD64VPSLLVQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPSLLVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPSLLVW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSW128 { + if v_1.Op != OpAMD64VPSLLVW128 { break } y := v_1.Args[1] @@ -44777,56 +45393,60 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSWMasked128Merging) + v.reset(OpAMD64VPSLLVWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBW256 x) mask) + // match: (VPBLENDVB128 dst (VPSLLW128const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBW256 { + if v_1.Op != OpAMD64VPSLLW128const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPSLLWMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBD512 x) mask) + // match: (VPBLENDVB128 dst (VPSRAD128const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBD512 { + if v_1.Op != OpAMD64VPSRAD128const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPSRADMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFHW128 { + if v_1.Op != OpAMD64VPSRAQ128const { break } a := auxIntToUint8(v_1.AuxInt) @@ -44835,40 +45455,39 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFHWMasked128Merging) + v.reset(OpAMD64VPSRAQMasked128constMerging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLW128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPSRAVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLW128const { + if v_1.Op != OpAMD64VPSRAVD128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLWMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPSRAVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLVD128 x y) mask) + // match: (VPBLENDVB128 dst (VPSRAVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVD128 { + if v_1.Op != OpAMD64VPSRAVQ128 { break } y := v_1.Args[1] @@ -44877,18 +45496,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPSRAVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask) + // match: (VPBLENDVB128 dst (VPSRAVW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVD128 { + if v_1.Op != OpAMD64VPSRAVW128 { break } y := v_1.Args[1] @@ -44897,37 +45516,39 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPSRAVWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) + // match: (VPBLENDVB128 dst (VPSRAW128const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWQ512 { + if v_1.Op != OpAMD64VPSRAW128const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWQMasked512Merging) + v.reset(OpAMD64VPSRAWMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBQ128 { + if v_1.Op != OpAMD64VPSRLVD128 { break } y := v_1.Args[1] @@ -44936,31 +45557,30 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPSRLVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLD128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPSRLVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLD128const { + if v_1.Op != OpAMD64VPSRLVQ128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLDMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPSRLVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } // match: (VPBLENDVB128 dst (VPSRLVW128 x y) mask) @@ -44983,33 +45603,32 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLQ128const { + if v_1.Op != OpAMD64VPSUBB128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLQMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPSUBBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAVD128 x y) mask) + // match: (VPBLENDVB128 dst (VPSUBD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVD128 { + if v_1.Op != OpAMD64VPSUBD128 { break } y := v_1.Args[1] @@ -45018,75 +45637,78 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVDMasked128Merging) + v.reset(OpAMD64VPSUBDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBD128 x) mask) + // match: (VPBLENDVB128 dst (VPSUBQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBD128 { + if v_1.Op != OpAMD64VPSUBQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPSUBQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) + // match: (VPBLENDVB128 dst (VPSUBSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBQ512 { + if v_1.Op != OpAMD64VPSUBSB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBQMasked512Merging) + v.reset(OpAMD64VPSUBSBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPLZCNTQ128 x) mask) + // match: (VPBLENDVB128 dst (VPSUBSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTQ128 { + if v_1.Op != OpAMD64VPSUBSW128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPACKSSDW128 x y) mask) + // match: (VPBLENDVB128 dst (VPSUBUSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPACKSSDW128 { + if v_1.Op != OpAMD64VPSUBUSB128 { break } y := v_1.Args[1] @@ -45095,37 +45717,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPACKSSDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBUSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWD128 x) mask) + // match: (VPBLENDVB128 dst (VPSUBUSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWD128 { + if v_1.Op != OpAMD64VPSUBUSW128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWDMasked128Merging) + v.reset(OpAMD64VPSUBUSWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPSUBW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVQ128 { + if v_1.Op != OpAMD64VPSUBW128 { break } y := v_1.Args[1] @@ -45134,18 +45757,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPSUBWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTD128 x) mask) + // match: (VPBLENDVB128 dst (VRCP14PD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTD128 { + if v_1.Op != OpAMD64VRCP14PD128 { break } x := v_1.Args[0] @@ -45153,99 +45776,102 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VRCP14PDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VMAXPS128 x y) mask) + // match: (VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + // result: (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPS128 { + if v_1.Op != OpAMD64VREDUCEPD128 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMAXPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VREDUCEPDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDQ128 { + if v_1.Op != OpAMD64VREDUCEPS128 { break } a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQMasked128Merging) + v.reset(OpAMD64VREDUCEPSMasked128Merging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXUW128 x y) mask) + // match: (VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUW128 { + if v_1.Op != OpAMD64VRNDSCALEPD128 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VRNDSCALEPDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPABSB128 x) mask) + // match: (VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSB128 { + if v_1.Op != OpAMD64VRNDSCALEPS128 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VRNDSCALEPSMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPABSQ128 x) mask) + // match: (VPBLENDVB128 dst (VRSQRT14PD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + // result: (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSQ128 { + if v_1.Op != OpAMD64VRSQRT14PD128 { break } x := v_1.Args[0] @@ -45253,7 +45879,7 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSQMasked128Merging) + v.reset(OpAMD64VRSQRT14PDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) @@ -45279,6 +45905,45 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } + // match: (VPBLENDVB128 dst (VSCALEFPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VSCALEFPS128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VSCALEFPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VSQRTPD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPD128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VSQRTPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB128 dst (VSQRTPS128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask)) @@ -45298,31 +45963,32 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTW512 x) mask) + // match: (VPBLENDVB128 dst (VSUBPD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask)) + // result: (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTW512 { + if v_1.Op != OpAMD64VSUBPD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTWMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VSUBPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXUB128 x y) mask) + // match: (VPBLENDVB128 dst (VSUBPS128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) + // result: (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUB128 { + if v_1.Op != OpAMD64VSUBPS128 { break } y := v_1.Args[1] @@ -45331,8 +45997,8 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VSUBPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true @@ -45344,31 +46010,90 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (VPBLENDVB256 dst (VPMOVSXBW512 x) mask) + // match: (VPBLENDVB256 dst (VADDPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBW512 { + if v_1.Op != OpAMD64VADDPD256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBWMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VADDPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB256 dst (VADDPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VADDPS256 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VADDPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTPS2UDQ256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VCVTPS2UDQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPADDUSB256 x y) mask) + // match: (VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSB256 { + if v_1.Op != OpAMD64VCVTTPS2DQ256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VCVTTPS2DQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB256 dst (VDIVPD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VDIVPD256 { break } y := v_1.Args[1] @@ -45377,18 +46102,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDUSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VDIVPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VMULPS256 x y) mask) + // match: (VPBLENDVB256 dst (VDIVPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMULPS256 { + if v_1.Op != OpAMD64VDIVPS256 { break } y := v_1.Args[1] @@ -45397,37 +46122,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMULPSMasked256Merging) + v.reset(OpAMD64VDIVPSMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPOPCNTB256 x) mask) + // match: (VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTB256 { + if v_1.Op != OpAMD64VGF2P8MULB256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTBMasked256Merging) + v.reset(OpAMD64VGF2P8MULBMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VSUBPS256 x y) mask) + // match: (VPBLENDVB256 dst (VMAXPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPS256 { + if v_1.Op != OpAMD64VMAXPD256 { break } y := v_1.Args[1] @@ -45436,18 +46162,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSUBPSMasked256Merging) + v.reset(OpAMD64VMAXPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB256 dst (VMAXPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VMAXPS256 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMAXPSMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXUQ256 x y) mask) + // match: (VPBLENDVB256 dst (VMINPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUQ256 { + if v_1.Op != OpAMD64VMINPD256 { break } y := v_1.Args[1] @@ -45456,39 +46202,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUQMasked256Merging) + v.reset(OpAMD64VMINPDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPROLD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VMINPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLD256 { + if v_1.Op != OpAMD64VMINPS256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VMINPSMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAVD256 x y) mask) + // match: (VPBLENDVB256 dst (VMULPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVD256 { + if v_1.Op != OpAMD64VMULPD256 { break } y := v_1.Args[1] @@ -45497,18 +46242,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VMULPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VADDPS256 x y) mask) + // match: (VPBLENDVB256 dst (VMULPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VADDPS256 { + if v_1.Op != OpAMD64VMULPS256 { break } y := v_1.Args[1] @@ -45517,18 +46262,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VADDPSMasked256Merging) + v.reset(OpAMD64VMULPSMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) + // match: (VPBLENDVB256 dst (VPABSB256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXDQ512 { + if v_1.Op != OpAMD64VPABSB256 { break } x := v_1.Args[0] @@ -45536,18 +46281,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXDQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPABSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVUSWB128 x) mask) + // match: (VPBLENDVB256 dst (VPABSD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSWB128 { + if v_1.Op != OpAMD64VPABSD256 { break } x := v_1.Args[0] @@ -45555,39 +46300,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVUSWBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPABSDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPABSQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAQ256const { + if v_1.Op != OpAMD64VPABSQ256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPABSQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) + // match: (VPBLENDVB256 dst (VPABSW256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VCVTPS2UDQ256 { + if v_1.Op != OpAMD64VPABSW256 { break } x := v_1.Args[0] @@ -45595,18 +46338,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VCVTPS2UDQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPABSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMINSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPACKSSDW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSW256 { + if v_1.Op != OpAMD64VPACKSSDW256 { break } y := v_1.Args[1] @@ -45615,40 +46358,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPACKSSDWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPACKUSDW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDD256 { + if v_1.Op != OpAMD64VPACKUSDW256 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPACKUSDWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLVW256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVW256 { + if v_1.Op != OpAMD64VPADDB256 { break } y := v_1.Args[1] @@ -45657,18 +46398,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPADDBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRLVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVQ256 { + if v_1.Op != OpAMD64VPADDD256 { break } y := v_1.Args[1] @@ -45677,18 +46418,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPADDDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBUSB256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSB256 { + if v_1.Op != OpAMD64VPADDQ256 { break } y := v_1.Args[1] @@ -45697,18 +46438,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBUSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPADDQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSW256 { + if v_1.Op != OpAMD64VPADDSB256 { break } y := v_1.Args[1] @@ -45717,18 +46458,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPADDSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VMINPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMINPS256 { + if v_1.Op != OpAMD64VPADDSW256 { break } y := v_1.Args[1] @@ -45737,18 +46478,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMINPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPADDSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMINSD256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDUSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSD256 { + if v_1.Op != OpAMD64VPADDUSB256 { break } y := v_1.Args[1] @@ -45757,18 +46498,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPADDUSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDUSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSW256 { + if v_1.Op != OpAMD64VPADDUSW256 { break } y := v_1.Args[1] @@ -45777,39 +46518,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDSWMasked256Merging) + v.reset(OpAMD64VPADDUSWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPADDW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPS256 { + if v_1.Op != OpAMD64VPADDW256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRNDSCALEPSMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPADDWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPROLVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPAVGB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVQ256 { + if v_1.Op != OpAMD64VPAVGB256 { break } y := v_1.Args[1] @@ -45818,18 +46558,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPAVGBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMULHW256 x y) mask) + // match: (VPBLENDVB256 dst (VPAVGW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHW256 { + if v_1.Op != OpAMD64VPAVGW256 { break } y := v_1.Args[1] @@ -45838,30 +46578,29 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULHWMasked256Merging) + v.reset(OpAMD64VPAVGWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VDIVPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPLZCNTD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPD256 { + if v_1.Op != OpAMD64VPLZCNTD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VDIVPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPLZCNTDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } // match: (VPBLENDVB256 dst (VPLZCNTQ256 x) mask) @@ -45883,12 +46622,12 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVD256 { + if v_1.Op != OpAMD64VPMADDUBSW256 { break } y := v_1.Args[1] @@ -45897,18 +46636,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMADDUBSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMADDWD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDD256 { + if v_1.Op != OpAMD64VPMADDWD256 { break } y := v_1.Args[1] @@ -45917,56 +46656,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMADDWDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSDW128 x) mask) + // match: (VPBLENDVB256 dst (VPMAXSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSDW128 { + if v_1.Op != OpAMD64VPMAXSB256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMAXSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPOPCNTD256 x) mask) + // match: (VPBLENDVB256 dst (VPMAXSD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTD256 { + if v_1.Op != OpAMD64VPMAXSD256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTDMasked256Merging) + v.reset(OpAMD64VPMAXSDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDUSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMAXSQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSW256 { + if v_1.Op != OpAMD64VPMAXSQ256 { break } y := v_1.Args[1] @@ -45975,58 +46716,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDUSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMAXSQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VSQRTPD256 x) mask) + // match: (VPBLENDVB256 dst (VPMAXSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPD256 { + if v_1.Op != OpAMD64VPMAXSW256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSQRTPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMAXSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMAXUB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPS256 { + if v_1.Op != OpAMD64VPMAXUB256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VREDUCEPSMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPMAXUD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBQ256 { + if v_1.Op != OpAMD64VPMAXUD256 { break } y := v_1.Args[1] @@ -46035,37 +46776,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSXWD512 x) mask) + // match: (VPBLENDVB256 dst (VPMAXUQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWD512 { + if v_1.Op != OpAMD64VPMAXUQ256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) + // match: (VPBLENDVB256 dst (VPMAXUW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VGF2P8MULB256 { + if v_1.Op != OpAMD64VPMAXUW256 { break } y := v_1.Args[1] @@ -46074,18 +46816,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VGF2P8MULBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLVD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMINSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVD256 { + if v_1.Op != OpAMD64VPMINSB256 { break } y := v_1.Args[1] @@ -46094,18 +46836,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMINSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRLVW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMINSD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVW256 { + if v_1.Op != OpAMD64VPMINSD256 { break } y := v_1.Args[1] @@ -46114,18 +46856,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMINSDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMINSQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDW256 { + if v_1.Op != OpAMD64VPMINSQ256 { break } y := v_1.Args[1] @@ -46134,60 +46876,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMINSQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMINSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPD256 { + if v_1.Op != OpAMD64VPMINSW256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VREDUCEPDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMINSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMINUB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPD256 { + if v_1.Op != OpAMD64VPMINUB256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRNDSCALEPDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMINUBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPRORVD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMINUD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVD256 { + if v_1.Op != OpAMD64VPMINUD256 { break } y := v_1.Args[1] @@ -46196,99 +46936,96 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVDMasked256Merging) + v.reset(OpAMD64VPMINUDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPMINUQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDW256 { + if v_1.Op != OpAMD64VPMINUQ256 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDWMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMINUQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) + // match: (VPBLENDVB256 dst (VPMINUW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2DQ256 { + if v_1.Op != OpAMD64VPMINUW256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VCVTTPS2DQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMINUWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VSUBPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVDB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMOVDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPD256 { + if v_1.Op != OpAMD64VPMOVDB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSUBPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVDBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVDW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMOVDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBD256 { + if v_1.Op != OpAMD64VPMOVDW128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBDMasked256Merging) + v.reset(OpAMD64VPMOVDWMasked128_256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VSQRTPS256 x) mask) + // match: (VPBLENDVB256 dst (VPMOVQB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMOVQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPS256 { + if v_1.Op != OpAMD64VPMOVQB128_256 { break } x := v_1.Args[0] @@ -46296,78 +47033,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSQRTPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPACKUSDW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVQD128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMOVQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPACKUSDW256 { + if v_1.Op != OpAMD64VPMOVQD128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPACKUSDWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQDMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMULLD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVQW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMOVQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLD256 { + if v_1.Op != OpAMD64VPMOVQW128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPADDB256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSDB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPMOVSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDB256 { + if v_1.Op != OpAMD64VPMOVSDB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSDBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVWB128 x) mask) + // match: (VPBLENDVB256 dst (VPMOVSDW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMOVSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVWB128 { + if v_1.Op != OpAMD64VPMOVSDW128_256 { break } x := v_1.Args[0] @@ -46375,38 +47109,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVWBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSDWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMADDWD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSQB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMOVSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDWD256 { + if v_1.Op != OpAMD64VPMOVSQB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMADDWDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVQD128 x) mask) + // match: (VPBLENDVB256 dst (VPMOVSQD128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMOVSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVQD128 { + if v_1.Op != OpAMD64VPMOVSQD128_256 { break } x := v_1.Args[0] @@ -46414,78 +47147,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVQDMasked128Merging) + v.reset(OpAMD64VPMOVSQDMasked128_256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMULHUW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSQW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMOVSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHUW256 { + if v_1.Op != OpAMD64VPMOVSQW128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULHUWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMULLQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSWB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMOVSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLQ256 { + if v_1.Op != OpAMD64VPMOVSWB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSWBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPROLVD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSXBW512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVD256 { + if v_1.Op != OpAMD64VPMOVSXBW512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBWMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVUSDW128 x) mask) + // match: (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDW128 { + if v_1.Op != OpAMD64VPMOVSXDQ512 { break } x := v_1.Args[0] @@ -46493,140 +47223,132 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVUSDWMasked128Merging) + v.reset(OpAMD64VPMOVSXDQMasked512Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMULLW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSXWD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLW256 { + if v_1.Op != OpAMD64VPMOVSXWD512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLWMasked256Merging) + v.reset(OpAMD64VPMOVSXWDMasked512Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPRORD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORD256 { + if v_1.Op != OpAMD64VPMOVUSDB128_256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVUSDBMasked128_256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAVW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVW256 { + if v_1.Op != OpAMD64VPMOVUSDW128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSDWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMINUD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUD256 { + if v_1.Op != OpAMD64VPMOVUSQB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFD256 { + if v_1.Op != OpAMD64VPMOVUSQD128_256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSQDMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVQ256 { + if v_1.Op != OpAMD64VPMOVUSQW128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVQMasked256Merging) + v.reset(OpAMD64VPMOVUSQWMasked128_256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVUSQD128 x) mask) + // match: (VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQD128 { + if v_1.Op != OpAMD64VPMOVUSWB128_256 { break } x := v_1.Args[0] @@ -46634,38 +47356,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVUSQDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSWBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBUSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVWB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSW256 { + if v_1.Op != OpAMD64VPMOVWB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBUSWMasked256Merging) + v.reset(OpAMD64VPMOVWBMasked128_256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VRSQRT14PD256 x) mask) + // match: (VPBLENDVB256 dst (VPMOVZXBW512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PD256 { + if v_1.Op != OpAMD64VPMOVZXBW512 { break } x := v_1.Args[0] @@ -46673,30 +47394,29 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRSQRT14PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBWMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPADDSB256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSB256 { + if v_1.Op != OpAMD64VPMOVZXDQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXDQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } // match: (VPBLENDVB256 dst (VPMOVZXWD512 x) mask) @@ -46718,33 +47438,32 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPROLQ256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMULHUW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLQ256 { + if v_1.Op != OpAMD64VPMULHUW256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMULHUWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPAVGB256 x y) mask) + // match: (VPBLENDVB256 dst (VPMULHW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGB256 { + if v_1.Op != OpAMD64VPMULHW256 { break } y := v_1.Args[1] @@ -46753,18 +47472,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPAVGBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMULHWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPRORVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPMULLD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVQ256 { + if v_1.Op != OpAMD64VPMULLD256 { break } y := v_1.Args[1] @@ -46773,37 +47492,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMULLDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) + // match: (VPBLENDVB256 dst (VPMULLQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXDQ512 { + if v_1.Op != OpAMD64VPMULLQ256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXDQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMULLQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMINUB256 x y) mask) + // match: (VPBLENDVB256 dst (VPMULLW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUB256 { + if v_1.Op != OpAMD64VPMULLW256 { break } y := v_1.Args[1] @@ -46812,119 +47532,115 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMULLWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLW256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPOPCNTB256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLW256const { + if v_1.Op != OpAMD64VPOPCNTB256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLWMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VSCALEFPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPOPCNTD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPS256 { + if v_1.Op != OpAMD64VPOPCNTD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSCALEFPSMasked256Merging) + v.reset(OpAMD64VPOPCNTDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPOPCNTQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLQ256const { + if v_1.Op != OpAMD64VPOPCNTQ256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLQMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPOPCNTQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMINSB256 x y) mask) + // match: (VPBLENDVB256 dst (VPOPCNTW256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSB256 { + if v_1.Op != OpAMD64VPOPCNTW256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPABSQ256 x) mask) + // match: (VPBLENDVB256 dst (VPROLD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSQ256 { + if v_1.Op != OpAMD64VPROLD256 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPROLDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPROLQ256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFHW256 { + if v_1.Op != OpAMD64VPROLQ256 { break } a := auxIntToUint8(v_1.AuxInt) @@ -46933,19 +47649,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFHWMasked256Merging) + v.reset(OpAMD64VPROLQMasked256Merging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask) + // match: (VPBLENDVB256 dst (VPROLVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBB256 { + if v_1.Op != OpAMD64VPROLVD256 { break } y := v_1.Args[1] @@ -46954,18 +47670,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPROLVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VMAXPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPROLVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPS256 { + if v_1.Op != OpAMD64VPROLVQ256 { break } y := v_1.Args[1] @@ -46974,58 +47690,60 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMAXPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPROLVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXSD256 x y) mask) + // match: (VPBLENDVB256 dst (VPRORD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSD256 { + if v_1.Op != OpAMD64VPRORD256 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSDMasked256Merging) + v.reset(OpAMD64VPRORDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VMULPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPRORQ256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMULPD256 { + if v_1.Op != OpAMD64VPRORQ256 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMULPDMasked256Merging) + v.reset(OpAMD64VPRORQMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VDIVPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPRORVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPS256 { + if v_1.Op != OpAMD64VPRORVD256 { break } y := v_1.Args[1] @@ -47034,18 +47752,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VDIVPSMasked256Merging) + v.reset(OpAMD64VPRORVDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXSQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPRORVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSQ256 { + if v_1.Op != OpAMD64VPRORVQ256 { break } y := v_1.Args[1] @@ -47054,28 +47772,30 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSQMasked256Merging) + v.reset(OpAMD64VPRORVQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VMINPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMINPD256 { + if v_1.Op != OpAMD64VPSHLDD256 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMINPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSHLDDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true @@ -47102,90 +47822,100 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VSCALEFPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPD256 { + if v_1.Op != OpAMD64VPSHLDW256 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSCALEFPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSHLDWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSWB128 x) mask) + // match: (VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSWB128 { + if v_1.Op != OpAMD64VPSHRDD256 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSWBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSHRDDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMINSQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSQ256 { + if v_1.Op != OpAMD64VPSHRDQ256 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSQMasked256Merging) + v.reset(OpAMD64VPSHRDQMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPABSD256 x) mask) + // match: (VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSD256 { + if v_1.Op != OpAMD64VPSHRDW256 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSHRDWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMINUW256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHUFB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUW256 { + if v_1.Op != OpAMD64VPSHUFB256 { break } y := v_1.Args[1] @@ -47194,79 +47924,102 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSHUFBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDW256 { + if v_1.Op != OpAMD64VPSHUFD256 { break } a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDWMasked256Merging) + v.reset(OpAMD64VPSHUFDMasked256Merging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVZXBW512 x) mask) + // match: (VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBW512 { + if v_1.Op != OpAMD64VPSHUFHW256 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBWMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPSHUFHWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXUD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSLLD256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUD256 { + if v_1.Op != OpAMD64VPSLLD256const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUDMasked256Merging) + v.reset(OpAMD64VPSLLDMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXSB256 x y) mask) + // match: (VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSB256 { + if v_1.Op != OpAMD64VPSLLQ256const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSLLQMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB256 dst (VPSLLVD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSLLVD256 { break } y := v_1.Args[1] @@ -47275,40 +48028,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPSLLVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPSLLVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDQ256 { + if v_1.Op != OpAMD64VPSLLVQ256 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLVQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPSLLVW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDUBSW256 { + if v_1.Op != OpAMD64VPSLLVW256 { break } y := v_1.Args[1] @@ -47317,18 +48068,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMADDUBSWMasked256Merging) + v.reset(OpAMD64VPSLLVWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLD256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPSLLW256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLD256const { + if v_1.Op != OpAMD64VPSLLW256const { break } a := auxIntToUint8(v_1.AuxInt) @@ -47337,80 +48088,81 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLDMasked256constMerging) + v.reset(OpAMD64VPSLLWMasked256constMerging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMINUQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRAD256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUQ256 { + if v_1.Op != OpAMD64VPSRAD256const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSRADMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VRCP14PD256 x) mask) + // match: (VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRCP14PD256 { + if v_1.Op != OpAMD64VPSRAQ256const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PDMasked256Merging) + v.reset(OpAMD64VPSRAQMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPSRAVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDD256 { + if v_1.Op != OpAMD64VPSRAVD256 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSRAVDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDQ256 { + if v_1.Op != OpAMD64VPSRAVQ256 { break } y := v_1.Args[1] @@ -47419,18 +48171,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDQMasked256Merging) + v.reset(OpAMD64VPSRAVQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXUB256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRAVW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUB256 { + if v_1.Op != OpAMD64VPSRAVW256 { break } y := v_1.Args[1] @@ -47439,18 +48191,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPSRAVWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPRORQ256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPSRAW256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORQ256 { + if v_1.Op != OpAMD64VPSRAW256const { break } a := auxIntToUint8(v_1.AuxInt) @@ -47459,19 +48211,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQMasked256Merging) + v.reset(OpAMD64VPSRAWMasked256constMerging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VADDPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VADDPD256 { + if v_1.Op != OpAMD64VPSRLVD256 { break } y := v_1.Args[1] @@ -47480,18 +48232,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VADDPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSRLVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSHUFB256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRLVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFB256 { + if v_1.Op != OpAMD64VPSRLVQ256 { break } y := v_1.Args[1] @@ -47500,39 +48252,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPSRLVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAD256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPSRLVW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAD256const { + if v_1.Op != OpAMD64VPSRLVW256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRADMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSRLVWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBW256 x y) mask) + // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBW256 { + if v_1.Op != OpAMD64VPSUBB256 { break } y := v_1.Args[1] @@ -47541,58 +48292,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSUBBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAW256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPSUBD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAW256const { + if v_1.Op != OpAMD64VPSUBD256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAWMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSUBDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPABSW256 x) mask) + // match: (VPBLENDVB256 dst (VPSUBQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSW256 { + if v_1.Op != OpAMD64VPSUBQ256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSUBQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPACKSSDW256 x y) mask) + // match: (VPBLENDVB256 dst (VPSUBSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPACKSSDW256 { + if v_1.Op != OpAMD64VPSUBSB256 { break } y := v_1.Args[1] @@ -47601,56 +48352,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPACKSSDWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSQD128 x) mask) + // match: (VPBLENDVB256 dst (VPSUBSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSQD128 { + if v_1.Op != OpAMD64VPSUBSW256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSQDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPLZCNTD256 x) mask) + // match: (VPBLENDVB256 dst (VPSUBUSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTD256 { + if v_1.Op != OpAMD64VPSUBUSB256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSUBUSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VMAXPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSUBUSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPD256 { + if v_1.Op != OpAMD64VPSUBUSW256 { break } y := v_1.Args[1] @@ -47659,18 +48412,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMAXPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBUSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPAVGW256 x y) mask) + // match: (VPBLENDVB256 dst (VPSUBW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGW256 { + if v_1.Op != OpAMD64VPSUBW256 { break } y := v_1.Args[1] @@ -47679,18 +48432,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPAVGWMasked256Merging) + v.reset(OpAMD64VPSUBWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPOPCNTQ256 x) mask) + // match: (VPBLENDVB256 dst (VRCP14PD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTQ256 { + if v_1.Op != OpAMD64VRCP14PD256 { break } x := v_1.Args[0] @@ -47698,77 +48451,121 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQMasked256Merging) + v.reset(OpAMD64VRCP14PDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBSW256 x y) mask) + // match: (VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSW256 { + if v_1.Op != OpAMD64VREDUCEPD256 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VREDUCEPDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXUW256 x y) mask) + // match: (VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUW256 { + if v_1.Op != OpAMD64VREDUCEPS256 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VREDUCEPSMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPOPCNTW256 x) mask) + // match: (VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask)) + // result: (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTW256 { + if v_1.Op != OpAMD64VRNDSCALEPD256 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VRNDSCALEPDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + // result: (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVQ256 { + if v_1.Op != OpAMD64VRNDSCALEPS256 { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VRNDSCALEPSMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB256 dst (VRSQRT14PD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VRSQRT14PD256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VRSQRT14PDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB256 dst (VSCALEFPD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VSCALEFPD256 { break } y := v_1.Args[1] @@ -47777,37 +48574,57 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQMasked256Merging) + v.reset(OpAMD64VSCALEFPDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPABSB256 x) mask) + // match: (VPBLENDVB256 dst (VSCALEFPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSB256 { + if v_1.Op != OpAMD64VSCALEFPS256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VSCALEFPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB256 dst (VSQRTPD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPD256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VSQRTPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVDW128 x) mask) + // match: (VPBLENDVB256 dst (VSQRTPS256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) + // result: (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVDW128 { + if v_1.Op != OpAMD64VSQRTPS256 { break } x := v_1.Args[0] @@ -47815,18 +48632,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVDWMasked128Merging) + v.reset(OpAMD64VSQRTPSMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBSB256 x y) mask) + // match: (VPBLENDVB256 dst (VSUBPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) + // result: (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSB256 { + if v_1.Op != OpAMD64VSUBPD256 { break } y := v_1.Args[1] @@ -47835,8 +48652,28 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VSUBPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB256 dst (VSUBPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VSUBPS256 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VSUBPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true |
