diff options
Diffstat (limited to 'src/cmd/compile')
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/simdAMD64.rules | 36 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewriteAMD64.go | 675 |
2 files changed, 711 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 63b37f99cb..861629b249 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1067,14 +1067,23 @@ (ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...) (VPSRAQ512 x (MOVQconst [c])) => (VPSRAQ512const [uint8(c)] x) (ShiftAllRightUint16x8 ...) => (VPSRLW128 ...) +(VPSRLW128 x (MOVQconst [c])) => (VPSRLW128const [uint8(c)] x) (ShiftAllRightUint16x16 ...) => (VPSRLW256 ...) +(VPSRLW256 x (MOVQconst [c])) => (VPSRLW256const [uint8(c)] x) (ShiftAllRightUint16x32 ...) => (VPSRLW512 ...) +(VPSRLW512 x (MOVQconst [c])) => (VPSRLW512const [uint8(c)] x) (ShiftAllRightUint32x4 ...) => (VPSRLD128 ...) +(VPSRLD128 x (MOVQconst [c])) => (VPSRLD128const [uint8(c)] x) (ShiftAllRightUint32x8 ...) => (VPSRLD256 ...) +(VPSRLD256 x (MOVQconst [c])) => (VPSRLD256const [uint8(c)] x) (ShiftAllRightUint32x16 ...) => (VPSRLD512 ...) +(VPSRLD512 x (MOVQconst [c])) => (VPSRLD512const [uint8(c)] x) (ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...) +(VPSRLQ128 x (MOVQconst [c])) => (VPSRLQ128const [uint8(c)] x) (ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...) +(VPSRLQ256 x (MOVQconst [c])) => (VPSRLQ256const [uint8(c)] x) (ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...) +(VPSRLQ512 x (MOVQconst [c])) => (VPSRLQ512const [uint8(c)] x) (ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...) (ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...) (ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...) @@ -1102,6 +1111,15 @@ (VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask) (VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask) (VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask) +(VPSRLWMasked128 x (MOVQconst [c]) mask) => (VPSRLWMasked128const [uint8(c)] x mask) +(VPSRLWMasked256 x (MOVQconst [c]) mask) => (VPSRLWMasked256const [uint8(c)] x mask) +(VPSRLWMasked512 x (MOVQconst [c]) mask) => (VPSRLWMasked512const [uint8(c)] x mask) +(VPSRLDMasked128 x (MOVQconst [c]) mask) => (VPSRLDMasked128const [uint8(c)] x mask) +(VPSRLDMasked256 x (MOVQconst [c]) mask) => (VPSRLDMasked256const [uint8(c)] x mask) +(VPSRLDMasked512 x (MOVQconst [c]) mask) => (VPSRLDMasked512const [uint8(c)] x mask) +(VPSRLQMasked128 x (MOVQconst [c]) mask) => (VPSRLQMasked128const [uint8(c)] x mask) +(VPSRLQMasked256 x (MOVQconst [c]) mask) => (VPSRLQMasked256const [uint8(c)] x mask) +(VPSRLQMasked512 x (MOVQconst [c]) mask) => (VPSRLQMasked512const [uint8(c)] x mask) (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...) (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...) (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...) @@ -1960,6 +1978,15 @@ (VMOVDQU64Masked128 (VPSLLQ128const [a] x) mask) => (VPSLLQMasked128const [a] x mask) (VMOVDQU64Masked256 (VPSLLQ256const [a] x) mask) => (VPSLLQMasked256const [a] x mask) (VMOVDQU64Masked512 (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512const [a] x mask) +(VMOVDQU16Masked128 (VPSRLW128const [a] x) mask) => (VPSRLWMasked128const [a] x mask) +(VMOVDQU16Masked256 (VPSRLW256const [a] x) mask) => (VPSRLWMasked256const [a] x mask) +(VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) => (VPSRLWMasked512const [a] x mask) +(VMOVDQU32Masked128 (VPSRLD128const [a] x) mask) => (VPSRLDMasked128const [a] x mask) +(VMOVDQU32Masked256 (VPSRLD256const [a] x) mask) => (VPSRLDMasked256const [a] x mask) +(VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) => (VPSRLDMasked512const [a] x mask) +(VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask) => (VPSRLQMasked128const [a] x mask) +(VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask) => (VPSRLQMasked256const [a] x mask) +(VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512const [a] x mask) (VMOVDQU16Masked128 (VPSRAW128const [a] x) mask) => (VPSRAWMasked128const [a] x mask) (VMOVDQU16Masked256 (VPSRAW256const [a] x) mask) => (VPSRAWMasked256const [a] x mask) (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) => (VPSRAWMasked512const [a] x mask) @@ -2024,6 +2051,7 @@ (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask) (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask) => (VPSRLDMasked512constMerging dst [a] x mask) (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask) @@ -2078,6 +2106,7 @@ (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask) (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask) (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512constMerging dst [a] x mask) (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask) (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask) (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask) @@ -2115,6 +2144,7 @@ (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask) (VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask) (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask) => (VPSRLWMasked512constMerging dst [a] x mask) (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask) (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask) (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask) @@ -2288,9 +2318,12 @@ (VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask)) +(VPBLENDVB128 dst (VPSRLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) (VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask)) @@ -2440,9 +2473,12 @@ (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask)) +(VPBLENDVB256 dst (VPSRLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) (VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask)) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index dc0a3abb8b..bc66003811 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1724,20 +1724,44 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v) case OpAMD64VPSRAWMasked512: return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v) + case OpAMD64VPSRLD128: + return rewriteValueAMD64_OpAMD64VPSRLD128(v) + case OpAMD64VPSRLD256: + return rewriteValueAMD64_OpAMD64VPSRLD256(v) + case OpAMD64VPSRLD512: + return rewriteValueAMD64_OpAMD64VPSRLD512(v) case OpAMD64VPSRLD512const: return rewriteValueAMD64_OpAMD64VPSRLD512const(v) + case OpAMD64VPSRLDMasked128: + return rewriteValueAMD64_OpAMD64VPSRLDMasked128(v) case OpAMD64VPSRLDMasked128const: return rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v) + case OpAMD64VPSRLDMasked256: + return rewriteValueAMD64_OpAMD64VPSRLDMasked256(v) case OpAMD64VPSRLDMasked256const: return rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v) + case OpAMD64VPSRLDMasked512: + return rewriteValueAMD64_OpAMD64VPSRLDMasked512(v) case OpAMD64VPSRLDMasked512const: return rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v) + case OpAMD64VPSRLQ128: + return rewriteValueAMD64_OpAMD64VPSRLQ128(v) + case OpAMD64VPSRLQ256: + return rewriteValueAMD64_OpAMD64VPSRLQ256(v) + case OpAMD64VPSRLQ512: + return rewriteValueAMD64_OpAMD64VPSRLQ512(v) case OpAMD64VPSRLQ512const: return rewriteValueAMD64_OpAMD64VPSRLQ512const(v) + case OpAMD64VPSRLQMasked128: + return rewriteValueAMD64_OpAMD64VPSRLQMasked128(v) case OpAMD64VPSRLQMasked128const: return rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v) + case OpAMD64VPSRLQMasked256: + return rewriteValueAMD64_OpAMD64VPSRLQMasked256(v) case OpAMD64VPSRLQMasked256const: return rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v) + case OpAMD64VPSRLQMasked512: + return rewriteValueAMD64_OpAMD64VPSRLQMasked512(v) case OpAMD64VPSRLQMasked512const: return rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v) case OpAMD64VPSRLVD512: @@ -1756,6 +1780,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v) case OpAMD64VPSRLVQMasked512: return rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v) + case OpAMD64VPSRLW128: + return rewriteValueAMD64_OpAMD64VPSRLW128(v) + case OpAMD64VPSRLW256: + return rewriteValueAMD64_OpAMD64VPSRLW256(v) + case OpAMD64VPSRLW512: + return rewriteValueAMD64_OpAMD64VPSRLW512(v) + case OpAMD64VPSRLWMasked128: + return rewriteValueAMD64_OpAMD64VPSRLWMasked128(v) + case OpAMD64VPSRLWMasked256: + return rewriteValueAMD64_OpAMD64VPSRLWMasked256(v) + case OpAMD64VPSRLWMasked512: + return rewriteValueAMD64_OpAMD64VPSRLWMasked512(v) case OpAMD64VPSUBD512: return rewriteValueAMD64_OpAMD64VPSUBD512(v) case OpAMD64VPSUBDMasked128: @@ -33642,6 +33678,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked128 (VPSRLW128const [a] x) mask) + // result: (VPSRLWMasked128const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLW128const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLWMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked128 (VPSRAW128const [a] x) mask) // result: (VPSRAWMasked128const [a] x mask) for { @@ -34230,6 +34280,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked256 (VPSRLW256const [a] x) mask) + // result: (VPSRLWMasked256const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLW256const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLWMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPSRAW256const [a] x) mask) // result: (VPSRAWMasked256const [a] x mask) for { @@ -34746,6 +34810,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) + // result: (VPSRLWMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLW512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLWMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) // result: (VPSRAWMasked512const [a] x mask) for { @@ -35505,6 +35583,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked128 (VPSRLD128const [a] x) mask) + // result: (VPSRLDMasked128const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLD128const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked128 (VPSRAD128const [a] x) mask) // result: (VPSRADMasked128const [a] x mask) for { @@ -36386,6 +36478,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked256 (VPSRLD256const [a] x) mask) + // result: (VPSRLDMasked256const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLD256const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPSRAD256const [a] x) mask) // result: (VPSRADMasked256const [a] x mask) for { @@ -37271,6 +37377,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) + // result: (VPSRLDMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLD512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask) // result: (VPSRADMasked512const [a] x mask) for { @@ -38134,6 +38254,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask) + // result: (VPSRLQMasked128const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ128const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) // result: (VPSRAQMasked128const [a] x mask) for { @@ -39011,6 +39145,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask) + // result: (VPSRLQMasked256const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ256const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) // result: (VPSRAQMasked256const [a] x mask) for { @@ -39808,6 +39956,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) + // result: (VPSRLQMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) // result: (VPSRAQMasked512const [a] x mask) for { @@ -43835,6 +43997,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } + // match: (VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask) + // result: (VPSRLDMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLD512const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLDMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) // result: (VPSRLVDMasked512Merging dst x y mask) for { @@ -44606,6 +44783,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } + // match: (VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask) + // result: (VPSRLQMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLQ512const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) // result: (VPSRLVQMasked512Merging dst x y mask) for { @@ -45155,6 +45347,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } + // match: (VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask) + // result: (VPSRLWMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLW512const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLWMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) // result: (VPSUBSWMasked512Merging dst x y mask) for { @@ -48538,6 +48745,48 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB128 dst (VPSRLD128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLD128const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLDMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPSRLQ128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLQ128const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLQMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask)) @@ -48598,6 +48847,27 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } + // match: (VPBLENDVB128 dst (VPSRLW128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLW128const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLWMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask)) @@ -51560,6 +51830,48 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg3(dst, x, v0) return true } + // match: (VPBLENDVB256 dst (VPSRLD256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLD256const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLDMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB256 dst (VPSRLQ256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLQ256const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLQMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask)) @@ -51620,6 +51932,27 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } + // match: (VPBLENDVB256 dst (VPSRLW256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLW256const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRLWMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask)) @@ -61380,6 +61713,60 @@ func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLD128 x (MOVQconst [c])) + // result: (VPSRLD128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLD256 x (MOVQconst [c])) + // result: (VPSRLD256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLD512 x (MOVQconst [c])) + // result: (VPSRLD512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) @@ -61406,6 +61793,26 @@ func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLDMasked128 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61434,6 +61841,26 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLDMasked256 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61462,6 +61889,26 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLDMasked512 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61490,6 +61937,60 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQ128 x (MOVQconst [c])) + // result: (VPSRLQ128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQ256 x (MOVQconst [c])) + // result: (VPSRLQ256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQ512 x (MOVQconst [c])) + // result: (VPSRLQ512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool { v_0 := v.Args[0] // match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) @@ -61516,6 +62017,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQMasked128 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61544,6 +62065,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQMasked256 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61572,6 +62113,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQMasked512 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -61828,6 +62389,120 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPSRLW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLW128 x (MOVQconst [c])) + // result: (VPSRLW128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLW256 x (MOVQconst [c])) + // result: (VPSRLW256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLW512 x (MOVQconst [c])) + // result: (VPSRLW512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLWMasked128 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLWMasked256 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLWMasked512 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] |
