From aa80d7a7e6bf97aa27a74cc5056ef270a2a0c2f4 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 24 Feb 2026 22:34:44 +0000 Subject: cmd/compile, simd/archsimd: add VPSRL immeidate peepholes Before this CL, the simdgen contains a sign check to selectively enable such rules for deduplication purposes. This left out `VPSRL` as it's only available in unsigned form. This CL fixes that. It looks like the previous documentation fix to SHA instruction might not had run go generate, so this CL also contains the generated code for that fix. There is also a weird phantom import in cmd/compile/internal/ssa/issue77582_test.go This CL also fixes that The trybot didn't complain? Change-Id: Ibbf9f789c1a67af1474f0285ab376bc07f17667e Reviewed-on: https://go-review.googlesource.com/c/go/+/748501 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- src/cmd/compile/internal/ssa/_gen/simdAMD64.rules | 36 ++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 675 ++++++++++++++++++++++ 2 files changed, 711 insertions(+) (limited to 'src/cmd/compile/internal') diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 63b37f99cb..861629b249 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1067,14 +1067,23 @@ (ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...) (VPSRAQ512 x (MOVQconst [c])) => (VPSRAQ512const [uint8(c)] x) (ShiftAllRightUint16x8 ...) => (VPSRLW128 ...) +(VPSRLW128 x (MOVQconst [c])) => (VPSRLW128const [uint8(c)] x) (ShiftAllRightUint16x16 ...) => (VPSRLW256 ...) +(VPSRLW256 x (MOVQconst [c])) => (VPSRLW256const [uint8(c)] x) (ShiftAllRightUint16x32 ...) => (VPSRLW512 ...) +(VPSRLW512 x (MOVQconst [c])) => (VPSRLW512const [uint8(c)] x) (ShiftAllRightUint32x4 ...) => (VPSRLD128 ...) +(VPSRLD128 x (MOVQconst [c])) => (VPSRLD128const [uint8(c)] x) (ShiftAllRightUint32x8 ...) => (VPSRLD256 ...) +(VPSRLD256 x (MOVQconst [c])) => (VPSRLD256const [uint8(c)] x) (ShiftAllRightUint32x16 ...) => (VPSRLD512 ...) +(VPSRLD512 x (MOVQconst [c])) => (VPSRLD512const [uint8(c)] x) (ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...) +(VPSRLQ128 x (MOVQconst [c])) => (VPSRLQ128const [uint8(c)] x) (ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...) +(VPSRLQ256 x (MOVQconst [c])) => (VPSRLQ256const [uint8(c)] x) (ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...) +(VPSRLQ512 x (MOVQconst [c])) => (VPSRLQ512const [uint8(c)] x) (ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...) (ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...) (ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...) @@ -1102,6 +1111,15 @@ (VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask) (VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask) (VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask) +(VPSRLWMasked128 x (MOVQconst [c]) mask) => (VPSRLWMasked128const [uint8(c)] x mask) +(VPSRLWMasked256 x (MOVQconst [c]) mask) => (VPSRLWMasked256const [uint8(c)] x mask) +(VPSRLWMasked512 x (MOVQconst [c]) mask) => (VPSRLWMasked512const [uint8(c)] x mask) +(VPSRLDMasked128 x (MOVQconst [c]) mask) => (VPSRLDMasked128const [uint8(c)] x mask) +(VPSRLDMasked256 x (MOVQconst [c]) mask) => (VPSRLDMasked256const [uint8(c)] x mask) +(VPSRLDMasked512 x (MOVQconst [c]) mask) => (VPSRLDMasked512const [uint8(c)] x mask) +(VPSRLQMasked128 x (MOVQconst [c]) mask) => (VPSRLQMasked128const [uint8(c)] x mask) +(VPSRLQMasked256 x (MOVQconst [c]) mask) => (VPSRLQMasked256const [uint8(c)] x mask) +(VPSRLQMasked512 x (MOVQconst [c]) mask) => (VPSRLQMasked512const [uint8(c)] x mask) (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...) (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...) (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...) @@ -1960,6 +1978,15 @@ (VMOVDQU64Masked128 (VPSLLQ128const [a] x) mask) => (VPSLLQMasked128const [a] x mask) (VMOVDQU64Masked256 (VPSLLQ256const [a] x) mask) => (VPSLLQMasked256const [a] x mask) (VMOVDQU64Masked512 (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512const [a] x mask) +(VMOVDQU16Masked128 (VPSRLW128const [a] x) mask) => (VPSRLWMasked128const [a] x mask) +(VMOVDQU16Masked256 (VPSRLW256const [a] x) mask) => (VPSRLWMasked256const [a] x mask) +(VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) => (VPSRLWMasked512const [a] x mask) +(VMOVDQU32Masked128 (VPSRLD128const [a] x) mask) => (VPSRLDMasked128const [a] x mask) +(VMOVDQU32Masked256 (VPSRLD256const [a] x) mask) => (VPSRLDMasked256const [a] x mask) +(VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) => (VPSRLDMasked512const [a] x mask) +(VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask) => (VPSRLQMasked128const [a] x mask) +(VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask) => (VPSRLQMasked256const [a] x mask) +(VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512const [a] x mask) (VMOVDQU16Masked128 (VPSRAW128const [a] x) mask) => (VPSRAWMasked128const [a] x mask) (VMOVDQU16Masked256 (VPSRAW256const [a] x) mask) => (VPSRAWMasked256const [a] x mask) (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) => (VPSRAWMasked512const [a] x mask) @@ -2024,6 +2051,7 @@ (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask) (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask) => (VPSRLDMasked512constMerging dst [a] x mask) (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask) @@ -2078,6 +2106,7 @@ (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask) (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask) (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512constMerging dst [a] x mask) (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask) (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask) (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask) @@ -2115,6 +2144,7 @@ (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask) (VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask) (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask) => (VPSRLWMasked512constMerging dst [a] x mask) (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask) (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask) (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask) @@ -2288,9 +2318,12 @@ (VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) (VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) (VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSRLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPSRLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) (VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) (VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSRLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) (VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) (VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) @@ -2440,9 +2473,12 @@ (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) (VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) (VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSRLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPSRLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) (VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) (VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) (VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSRLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) (VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) (VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) (VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index dc0a3abb8b..bc66003811 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1724,20 +1724,44 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v) case OpAMD64VPSRAWMasked512: return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v) + case OpAMD64VPSRLD128: + return rewriteValueAMD64_OpAMD64VPSRLD128(v) + case OpAMD64VPSRLD256: + return rewriteValueAMD64_OpAMD64VPSRLD256(v) + case OpAMD64VPSRLD512: + return rewriteValueAMD64_OpAMD64VPSRLD512(v) case OpAMD64VPSRLD512const: return rewriteValueAMD64_OpAMD64VPSRLD512const(v) + case OpAMD64VPSRLDMasked128: + return rewriteValueAMD64_OpAMD64VPSRLDMasked128(v) case OpAMD64VPSRLDMasked128const: return rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v) + case OpAMD64VPSRLDMasked256: + return rewriteValueAMD64_OpAMD64VPSRLDMasked256(v) case OpAMD64VPSRLDMasked256const: return rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v) + case OpAMD64VPSRLDMasked512: + return rewriteValueAMD64_OpAMD64VPSRLDMasked512(v) case OpAMD64VPSRLDMasked512const: return rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v) + case OpAMD64VPSRLQ128: + return rewriteValueAMD64_OpAMD64VPSRLQ128(v) + case OpAMD64VPSRLQ256: + return rewriteValueAMD64_OpAMD64VPSRLQ256(v) + case OpAMD64VPSRLQ512: + return rewriteValueAMD64_OpAMD64VPSRLQ512(v) case OpAMD64VPSRLQ512const: return rewriteValueAMD64_OpAMD64VPSRLQ512const(v) + case OpAMD64VPSRLQMasked128: + return rewriteValueAMD64_OpAMD64VPSRLQMasked128(v) case OpAMD64VPSRLQMasked128const: return rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v) + case OpAMD64VPSRLQMasked256: + return rewriteValueAMD64_OpAMD64VPSRLQMasked256(v) case OpAMD64VPSRLQMasked256const: return rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v) + case OpAMD64VPSRLQMasked512: + return rewriteValueAMD64_OpAMD64VPSRLQMasked512(v) case OpAMD64VPSRLQMasked512const: return rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v) case OpAMD64VPSRLVD512: @@ -1756,6 +1780,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v) case OpAMD64VPSRLVQMasked512: return rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v) + case OpAMD64VPSRLW128: + return rewriteValueAMD64_OpAMD64VPSRLW128(v) + case OpAMD64VPSRLW256: + return rewriteValueAMD64_OpAMD64VPSRLW256(v) + case OpAMD64VPSRLW512: + return rewriteValueAMD64_OpAMD64VPSRLW512(v) + case OpAMD64VPSRLWMasked128: + return rewriteValueAMD64_OpAMD64VPSRLWMasked128(v) + case OpAMD64VPSRLWMasked256: + return rewriteValueAMD64_OpAMD64VPSRLWMasked256(v) + case OpAMD64VPSRLWMasked512: + return rewriteValueAMD64_OpAMD64VPSRLWMasked512(v) case OpAMD64VPSUBD512: return rewriteValueAMD64_OpAMD64VPSUBD512(v) case OpAMD64VPSUBDMasked128: @@ -33642,6 +33678,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked128 (VPSRLW128const [a] x) mask) + // result: (VPSRLWMasked128const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLW128const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLWMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked128 (VPSRAW128const [a] x) mask) // result: (VPSRAWMasked128const [a] x mask) for { @@ -34230,6 +34280,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked256 (VPSRLW256const [a] x) mask) + // result: (VPSRLWMasked256const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLW256const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLWMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPSRAW256const [a] x) mask) // result: (VPSRAWMasked256const [a] x mask) for { @@ -34746,6 +34810,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) + // result: (VPSRLWMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLW512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLWMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) // result: (VPSRAWMasked512const [a] x mask) for { @@ -35505,6 +35583,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked128 (VPSRLD128const [a] x) mask) + // result: (VPSRLDMasked128const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLD128const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked128 (VPSRAD128const [a] x) mask) // result: (VPSRADMasked128const [a] x mask) for { @@ -36386,6 +36478,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked256 (VPSRLD256const [a] x) mask) + // result: (VPSRLDMasked256const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLD256const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPSRAD256const [a] x) mask) // result: (VPSRADMasked256const [a] x mask) for { @@ -37271,6 +37377,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) + // result: (VPSRLDMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLD512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask) // result: (VPSRADMasked512const [a] x mask) for { @@ -38134,6 +38254,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask) + // result: (VPSRLQMasked128const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ128const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) // result: (VPSRAQMasked128const [a] x mask) for { @@ -39011,6 +39145,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask) + // result: (VPSRLQMasked256const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ256const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) // result: (VPSRAQMasked256const [a] x mask) for { @@ -39808,6 +39956,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) + // result: (VPSRLQMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) // result: (VPSRAQMasked512const [a] x mask) for { @@ -43835,6 +43997,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } + // match: (VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask) + // result: (VPSRLDMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLD512const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLDMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) // result: (VPSRLVDMasked512Merging dst x y mask) for { @@ -44606,6 +44783,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } + // match: (VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask) + // result: (VPSRLQMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLQ512const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) // result: (VPSRLVQMasked512Merging dst x y mask) for { @@ -45155,6 +45347,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } + // match: (VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask) + // result: (VPSRLWMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLW512const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLWMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) // result: (VPSUBSWMasked512Merging dst x y mask) for { @@ -48538,6 +48745,48 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB128 dst (VPSRLD128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLD128const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLDMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPSRLQ128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLQ128const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLQMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) @@ -48598,6 +48847,27 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } + // match: (VPBLENDVB128 dst (VPSRLW128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLW128const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLWMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) @@ -51560,6 +51830,48 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB256 dst (VPSRLD256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLD256const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLDMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB256 dst (VPSRLQ256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLQ256const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLQMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) @@ -51620,6 +51932,27 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } + // match: (VPBLENDVB256 dst (VPSRLW256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLW256const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLWMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) @@ -61380,6 +61713,60 @@ func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLD128 x (MOVQconst [c])) + // result: (VPSRLD128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLD256 x (MOVQconst [c])) + // result: (VPSRLD256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLD512 x (MOVQconst [c])) + // result: (VPSRLD512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) @@ -61406,6 +61793,26 @@ func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLDMasked128 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61434,6 +61841,26 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLDMasked256 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61462,6 +61889,26 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLDMasked512 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61490,6 +61937,60 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQ128 x (MOVQconst [c])) + // result: (VPSRLQ128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQ256 x (MOVQconst [c])) + // result: (VPSRLQ256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQ512 x (MOVQconst [c])) + // result: (VPSRLQ512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) @@ -61516,6 +62017,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQMasked128 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61544,6 +62065,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQMasked256 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61572,6 +62113,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQMasked512 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61828,6 +62389,120 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLW128 x (MOVQconst [c])) + // result: (VPSRLW128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLW256 x (MOVQconst [c])) + // result: (VPSRLW256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLW512 x (MOVQconst [c])) + // result: (VPSRLW512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLWMasked128 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLWMasked256 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLWMasked512 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] -- cgit v1.3-6-g1900