diff options
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/AMD64.rules')
| -rw-r--r-- | src/cmd/compile/internal/ssa/gen/AMD64.rules | 1164 |
1 files changed, 1164 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules new file mode 100644 index 0000000000..167ec82d18 --- /dev/null +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -0,0 +1,1164 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// x86 register conventions: +// - Integer types live in the low portion of registers. Upper portions are junk. +// - Boolean types use the low-order byte of a register. Upper bytes are junk. +// - We do not use AH,BH,CH,DH registers. +// - Floating-point types will live in the low natural slot of an sse2 register. +// Unused portions are junk. + +// Lowering arithmetic +(Add64 x y) -> (ADDQ x y) +(AddPtr x y) -> (ADDQ x y) +(Add32 x y) -> (ADDL x y) +(Add16 x y) -> (ADDW x y) +(Add8 x y) -> (ADDB x y) +(Add32F x y) -> (ADDSS x y) +(Add64F x y) -> (ADDSD x y) + +(Sub64 x y) -> (SUBQ x y) +(SubPtr x y) -> (SUBQ x y) +(Sub32 x y) -> (SUBL x y) +(Sub16 x y) -> (SUBW x y) +(Sub8 x y) -> (SUBB x y) +(Sub32F x y) -> (SUBSS x y) +(Sub64F x y) -> (SUBSD x y) + +(Mul64 x y) -> (MULQ x y) +(Mul32 x y) -> (MULL x y) +(Mul16 x y) -> (MULW x y) +(Mul8 x y) -> (MULB x y) +(Mul32F x y) -> (MULSS x y) +(Mul64F x y) -> (MULSD x y) + +(Div32F x y) -> (DIVSS x y) +(Div64F x y) -> (DIVSD x y) + +(Div64 x y) -> (DIVQ x y) +(Div64u x y) -> (DIVQU x y) +(Div32 x y) -> (DIVL x y) +(Div32u x y) -> (DIVLU x y) +(Div16 x y) -> (DIVW x y) +(Div16u x y) -> (DIVWU x y) +(Div8 x y) -> (DIVW (SignExt8to16 x) (SignExt8to16 y)) +(Div8u x y) -> (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)) + +(Hmul64 x y) -> (HMULQ x y) +(Hmul64u x y) -> (HMULQU x y) +(Hmul32 x y) -> (HMULL x y) +(Hmul32u x y) -> (HMULLU x y) +(Hmul16 x y) -> (HMULW x y) +(Hmul16u x y) -> (HMULWU x y) +(Hmul8 x y) -> (HMULB x y) +(Hmul8u x y) -> (HMULBU x y) + +(Avg64u x y) -> (AVGQU x y) + +(Mod64 x y) -> (MODQ x y) +(Mod64u x y) -> (MODQU x y) +(Mod32 x y) -> (MODL x y) +(Mod32u x y) -> (MODLU x y) +(Mod16 x y) -> (MODW x y) +(Mod16u x y) -> (MODWU x y) +(Mod8 x y) -> (MODW (SignExt8to16 x) (SignExt8to16 y)) +(Mod8u x y) -> (MODWU (ZeroExt8to16 x) (ZeroExt8to16 y)) + +(And64 x y) -> (ANDQ x y) +(And32 x y) -> (ANDL x y) +(And16 x y) -> (ANDW x y) +(And8 x y) -> (ANDB x y) + +(Or64 x y) -> (ORQ x y) +(Or32 x y) -> (ORL x y) +(Or16 x y) -> (ORW x y) +(Or8 x y) -> (ORB x y) + +(Xor64 x y) -> (XORQ x y) +(Xor32 x y) -> (XORL x y) +(Xor16 x y) -> (XORW x y) +(Xor8 x y) -> (XORB x y) + +(Neg64 x) -> (NEGQ x) +(Neg32 x) -> (NEGL x) +(Neg16 x) -> (NEGW x) +(Neg8 x) -> (NEGB x) +(Neg32F x) -> (PXOR x (MOVSSconst <config.Frontend().TypeFloat32()> [f2i(math.Copysign(0, -1))])) +(Neg64F x) -> (PXOR x (MOVSDconst <config.Frontend().TypeFloat64()> [f2i(math.Copysign(0, -1))])) + +(Com64 x) -> (NOTQ x) +(Com32 x) -> (NOTL x) +(Com16 x) -> (NOTW x) +(Com8 x) -> (NOTB x) + +(Sqrt x) -> (SQRTSD x) + +// Note: we always extend to 64 bits even though some ops don't need that many result bits. +(SignExt8to16 x) -> (MOVBQSX x) +(SignExt8to32 x) -> (MOVBQSX x) +(SignExt8to64 x) -> (MOVBQSX x) +(SignExt16to32 x) -> (MOVWQSX x) +(SignExt16to64 x) -> (MOVWQSX x) +(SignExt32to64 x) -> (MOVLQSX x) + +(ZeroExt8to16 x) -> (MOVBQZX x) +(ZeroExt8to32 x) -> (MOVBQZX x) +(ZeroExt8to64 x) -> (MOVBQZX x) +(ZeroExt16to32 x) -> (MOVWQZX x) +(ZeroExt16to64 x) -> (MOVWQZX x) +(ZeroExt32to64 x) -> (MOVLQZX x) + +(Cvt32to32F x) -> (CVTSL2SS x) +(Cvt32to64F x) -> (CVTSL2SD x) +(Cvt64to32F x) -> (CVTSQ2SS x) +(Cvt64to64F x) -> (CVTSQ2SD x) + +(Cvt32Fto32 x) -> (CVTTSS2SL x) +(Cvt32Fto64 x) -> (CVTTSS2SQ x) +(Cvt64Fto32 x) -> (CVTTSD2SL x) +(Cvt64Fto64 x) -> (CVTTSD2SQ x) + +(Cvt32Fto64F x) -> (CVTSS2SD x) +(Cvt64Fto32F x) -> (CVTSD2SS x) + +// Because we ignore high parts of registers, truncates are just copies. +(Trunc16to8 x) -> x +(Trunc32to8 x) -> x +(Trunc32to16 x) -> x +(Trunc64to8 x) -> x +(Trunc64to16 x) -> x +(Trunc64to32 x) -> x + +// Lowering shifts +// Unsigned shifts need to return 0 if shift amount is >= width of shifted value. +// result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff) +// Note: for small shifts we generate 32 bits of mask even when we don't need it all. +(Lsh64x64 <t> x y) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64]))) +(Lsh64x32 <t> x y) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64]))) +(Lsh64x16 <t> x y) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64]))) +(Lsh64x8 <t> x y) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64]))) + +(Lsh32x64 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32]))) +(Lsh32x32 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32]))) +(Lsh32x16 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32]))) +(Lsh32x8 <t> x y) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32]))) + +(Lsh16x64 <t> x y) -> (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16]))) +(Lsh16x32 <t> x y) -> (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16]))) +(Lsh16x16 <t> x y) -> (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16]))) +(Lsh16x8 <t> x y) -> (ANDW (SHLW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16]))) + +(Lsh8x64 <t> x y) -> (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8]))) +(Lsh8x32 <t> x y) -> (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8]))) +(Lsh8x16 <t> x y) -> (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8]))) +(Lsh8x8 <t> x y) -> (ANDB (SHLB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8]))) + +(Lrot64 <t> x [c]) -> (ROLQconst <t> [c&63] x) +(Lrot32 <t> x [c]) -> (ROLLconst <t> [c&31] x) +(Lrot16 <t> x [c]) -> (ROLWconst <t> [c&15] x) +(Lrot8 <t> x [c]) -> (ROLBconst <t> [c&7] x) + +(Rsh64Ux64 <t> x y) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64]))) +(Rsh64Ux32 <t> x y) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64]))) +(Rsh64Ux16 <t> x y) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64]))) +(Rsh64Ux8 <t> x y) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64]))) + +(Rsh32Ux64 <t> x y) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32]))) +(Rsh32Ux32 <t> x y) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32]))) +(Rsh32Ux16 <t> x y) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32]))) +(Rsh32Ux8 <t> x y) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32]))) + +(Rsh16Ux64 <t> x y) -> (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16]))) +(Rsh16Ux32 <t> x y) -> (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16]))) +(Rsh16Ux16 <t> x y) -> (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16]))) +(Rsh16Ux8 <t> x y) -> (ANDW (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16]))) + +(Rsh8Ux64 <t> x y) -> (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8]))) +(Rsh8Ux32 <t> x y) -> (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8]))) +(Rsh8Ux16 <t> x y) -> (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8]))) +(Rsh8Ux8 <t> x y) -> (ANDB (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8]))) + +// Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. +// We implement this by setting the shift value to -1 (all ones) if the shift value is >= width. +// Note: for small shift widths we generate 32 bits of mask even when we don't need it all. +(Rsh64x64 <t> x y) -> (SARQ <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [64]))))) +(Rsh64x32 <t> x y) -> (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [64]))))) +(Rsh64x16 <t> x y) -> (SARQ <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [64]))))) +(Rsh64x8 <t> x y) -> (SARQ <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64]))))) + +(Rsh32x64 <t> x y) -> (SARL <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [32]))))) +(Rsh32x32 <t> x y) -> (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [32]))))) +(Rsh32x16 <t> x y) -> (SARL <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32]))))) +(Rsh32x8 <t> x y) -> (SARL <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32]))))) + +(Rsh16x64 <t> x y) -> (SARW <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [16]))))) +(Rsh16x32 <t> x y) -> (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [16]))))) +(Rsh16x16 <t> x y) -> (SARW <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16]))))) +(Rsh16x8 <t> x y) -> (SARW <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16]))))) + +(Rsh8x64 <t> x y) -> (SARB <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [8]))))) +(Rsh8x32 <t> x y) -> (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [8]))))) +(Rsh8x16 <t> x y) -> (SARB <t> x (ORW <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8]))))) +(Rsh8x8 <t> x y) -> (SARB <t> x (ORB <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8]))))) + +(Less64 x y) -> (SETL (CMPQ x y)) +(Less32 x y) -> (SETL (CMPL x y)) +(Less16 x y) -> (SETL (CMPW x y)) +(Less8 x y) -> (SETL (CMPB x y)) +(Less64U x y) -> (SETB (CMPQ x y)) +(Less32U x y) -> (SETB (CMPL x y)) +(Less16U x y) -> (SETB (CMPW x y)) +(Less8U x y) -> (SETB (CMPB x y)) +// Use SETGF with reversed operands to dodge NaN case +(Less64F x y) -> (SETGF (UCOMISD y x)) +(Less32F x y) -> (SETGF (UCOMISS y x)) + +(Leq64 x y) -> (SETLE (CMPQ x y)) +(Leq32 x y) -> (SETLE (CMPL x y)) +(Leq16 x y) -> (SETLE (CMPW x y)) +(Leq8 x y) -> (SETLE (CMPB x y)) +(Leq64U x y) -> (SETBE (CMPQ x y)) +(Leq32U x y) -> (SETBE (CMPL x y)) +(Leq16U x y) -> (SETBE (CMPW x y)) +(Leq8U x y) -> (SETBE (CMPB x y)) +// Use SETGEF with reversed operands to dodge NaN case +(Leq64F x y) -> (SETGEF (UCOMISD y x)) +(Leq32F x y) -> (SETGEF (UCOMISS y x)) + +(Greater64 x y) -> (SETG (CMPQ x y)) +(Greater32 x y) -> (SETG (CMPL x y)) +(Greater16 x y) -> (SETG (CMPW x y)) +(Greater8 x y) -> (SETG (CMPB x y)) +(Greater64U x y) -> (SETA (CMPQ x y)) +(Greater32U x y) -> (SETA (CMPL x y)) +(Greater16U x y) -> (SETA (CMPW x y)) +(Greater8U x y) -> (SETA (CMPB x y)) +// Note Go assembler gets UCOMISx operand order wrong, but it is right here +// Bug is accommodated at generation of assembly language. +(Greater64F x y) -> (SETGF (UCOMISD x y)) +(Greater32F x y) -> (SETGF (UCOMISS x y)) + +(Geq64 x y) -> (SETGE (CMPQ x y)) +(Geq32 x y) -> (SETGE (CMPL x y)) +(Geq16 x y) -> (SETGE (CMPW x y)) +(Geq8 x y) -> (SETGE (CMPB x y)) +(Geq64U x y) -> (SETAE (CMPQ x y)) +(Geq32U x y) -> (SETAE (CMPL x y)) +(Geq16U x y) -> (SETAE (CMPW x y)) +(Geq8U x y) -> (SETAE (CMPB x y)) +// Note Go assembler gets UCOMISx operand order wrong, but it is right here +// Bug is accommodated at generation of assembly language. +(Geq64F x y) -> (SETGEF (UCOMISD x y)) +(Geq32F x y) -> (SETGEF (UCOMISS x y)) + +(Eq64 x y) -> (SETEQ (CMPQ x y)) +(Eq32 x y) -> (SETEQ (CMPL x y)) +(Eq16 x y) -> (SETEQ (CMPW x y)) +(Eq8 x y) -> (SETEQ (CMPB x y)) +(EqPtr x y) -> (SETEQ (CMPQ x y)) +(Eq64F x y) -> (SETEQF (UCOMISD x y)) +(Eq32F x y) -> (SETEQF (UCOMISS x y)) + +(Neq64 x y) -> (SETNE (CMPQ x y)) +(Neq32 x y) -> (SETNE (CMPL x y)) +(Neq16 x y) -> (SETNE (CMPW x y)) +(Neq8 x y) -> (SETNE (CMPB x y)) +(NeqPtr x y) -> (SETNE (CMPQ x y)) +(Neq64F x y) -> (SETNEF (UCOMISD x y)) +(Neq32F x y) -> (SETNEF (UCOMISS x y)) + +(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem) +(Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem) +(Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem) +(Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem) +(Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem) +(Load <t> ptr mem) && is64BitFloat(t) -> (MOVSDload ptr mem) + +// These more-specific FP versions of Store pattern should come first. +(Store [8] ptr val mem) && is64BitFloat(val.Type) -> (MOVSDstore ptr val mem) +(Store [4] ptr val mem) && is32BitFloat(val.Type) -> (MOVSSstore ptr val mem) + +(Store [8] ptr val mem) -> (MOVQstore ptr val mem) +(Store [4] ptr val mem) -> (MOVLstore ptr val mem) +(Store [2] ptr val mem) -> (MOVWstore ptr val mem) +(Store [1] ptr val mem) -> (MOVBstore ptr val mem) + +// We want this to stick out so the to/from ptr conversion is obvious +(Convert <t> x mem) -> (MOVQconvert <t> x mem) + +// checks +(IsNonNil p) -> (SETNE (TESTQ p p)) +(IsInBounds idx len) -> (SETB (CMPQ idx len)) +(IsSliceInBounds idx len) -> (SETBE (CMPQ idx len)) +(NilCheck ptr mem) -> (LoweredNilCheck ptr mem) + +(GetG mem) -> (LoweredGetG mem) +(GetClosurePtr) -> (LoweredGetClosurePtr) + +// Small moves +(Move [0] _ _ mem) -> mem +(Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem) +(Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem) +(Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem) +(Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem) +(Move [16] dst src mem) -> (MOVOstore dst (MOVOload src mem) mem) +(Move [3] dst src mem) -> + (MOVBstore [2] dst (MOVBload [2] src mem) + (MOVWstore dst (MOVWload src mem) mem)) +(Move [5] dst src mem) -> + (MOVBstore [4] dst (MOVBload [4] src mem) + (MOVLstore dst (MOVLload src mem) mem)) +(Move [6] dst src mem) -> + (MOVWstore [4] dst (MOVWload [4] src mem) + (MOVLstore dst (MOVLload src mem) mem)) +(Move [7] dst src mem) -> + (MOVLstore [3] dst (MOVLload [3] src mem) + (MOVLstore dst (MOVLload src mem) mem)) +(Move [size] dst src mem) && size > 8 && size < 16 -> + (MOVQstore [size-8] dst (MOVQload [size-8] src mem) + (MOVQstore dst (MOVQload src mem) mem)) + +// Adjust moves to be a multiple of 16 bytes. +(Move [size] dst src mem) && size > 16 && size%16 != 0 && size%16 <= 8 -> + (Move [size-size%16] (ADDQconst <dst.Type> dst [size%16]) (ADDQconst <src.Type> src [size%16]) + (MOVQstore dst (MOVQload src mem) mem)) +(Move [size] dst src mem) && size > 16 && size%16 != 0 && size%16 > 8 -> + (Move [size-size%16] (ADDQconst <dst.Type> dst [size%16]) (ADDQconst <src.Type> src [size%16]) + (MOVOstore dst (MOVOload src mem) mem)) + +// Medium copying uses a duff device. +(Move [size] dst src mem) && size >= 32 && size <= 16*64 && size%16 == 0 -> + (DUFFCOPY [14*(64-size/16)] dst src mem) +// 14 and 64 are magic constants. 14 is the number of bytes to encode: +// MOVUPS (SI), X0 +// ADDQ $16, SI +// MOVUPS X0, (DI) +// ADDQ $16, DI +// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy. + +// Large copying uses REP MOVSQ. +(Move [size] dst src mem) && size > 16*64 && size%8 == 0 -> + (REPMOVSQ dst src (MOVQconst [size/8]) mem) + +(Not x) -> (XORBconst [1] x) + +(OffPtr [off] ptr) -> (ADDQconst [off] ptr) + +(Const8 [val]) -> (MOVBconst [val]) +(Const16 [val]) -> (MOVWconst [val]) +(Const32 [val]) -> (MOVLconst [val]) +(Const64 [val]) -> (MOVQconst [val]) +(Const32F [val]) -> (MOVSSconst [val]) +(Const64F [val]) -> (MOVSDconst [val]) +(ConstNil) -> (MOVQconst [0]) +(ConstBool [b]) -> (MOVBconst [b]) + +(Addr {sym} base) -> (LEAQ {sym} base) + +(ITab (Load ptr mem)) -> (MOVQload ptr mem) + +// block rewrites +(If (SETL cmp) yes no) -> (LT cmp yes no) +(If (SETLE cmp) yes no) -> (LE cmp yes no) +(If (SETG cmp) yes no) -> (GT cmp yes no) +(If (SETGE cmp) yes no) -> (GE cmp yes no) +(If (SETEQ cmp) yes no) -> (EQ cmp yes no) +(If (SETNE cmp) yes no) -> (NE cmp yes no) +(If (SETB cmp) yes no) -> (ULT cmp yes no) +(If (SETBE cmp) yes no) -> (ULE cmp yes no) +(If (SETA cmp) yes no) -> (UGT cmp yes no) +(If (SETAE cmp) yes no) -> (UGE cmp yes no) + +// Special case for floating point - LF/LEF not generated +(If (SETGF cmp) yes no) -> (UGT cmp yes no) +(If (SETGEF cmp) yes no) -> (UGE cmp yes no) +(If (SETEQF cmp) yes no) -> (EQF cmp yes no) +(If (SETNEF cmp) yes no) -> (NEF cmp yes no) + +(If cond yes no) -> (NE (TESTB cond cond) yes no) + +(NE (TESTB (SETL cmp)) yes no) -> (LT cmp yes no) +(NE (TESTB (SETLE cmp)) yes no) -> (LE cmp yes no) +(NE (TESTB (SETG cmp)) yes no) -> (GT cmp yes no) +(NE (TESTB (SETGE cmp)) yes no) -> (GE cmp yes no) +(NE (TESTB (SETEQ cmp)) yes no) -> (EQ cmp yes no) +(NE (TESTB (SETNE cmp)) yes no) -> (NE cmp yes no) +(NE (TESTB (SETB cmp)) yes no) -> (ULT cmp yes no) +(NE (TESTB (SETBE cmp)) yes no) -> (ULE cmp yes no) +(NE (TESTB (SETA cmp)) yes no) -> (UGT cmp yes no) +(NE (TESTB (SETAE cmp)) yes no) -> (UGE cmp yes no) + +// Special case for floating point - LF/LEF not generated +(NE (TESTB (SETGF cmp)) yes no) -> (UGT cmp yes no) +(NE (TESTB (SETGEF cmp)) yes no) -> (UGE cmp yes no) +(NE (TESTB (SETEQF cmp)) yes no) -> (EQF cmp yes no) +(NE (TESTB (SETNEF cmp)) yes no) -> (NEF cmp yes no) + +// Disabled because it interferes with the pattern match above and makes worse code. +// (SETNEF x) -> (ORQ (SETNE <config.Frontend().TypeInt8()> x) (SETNAN <config.Frontend().TypeInt8()> x)) +// (SETEQF x) -> (ANDQ (SETEQ <config.Frontend().TypeInt8()> x) (SETORD <config.Frontend().TypeInt8()> x)) + +(StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem) +(ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem) +(DeferCall [argwid] mem) -> (CALLdefer [argwid] mem) +(GoCall [argwid] mem) -> (CALLgo [argwid] mem) +(InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem) + +// Rules below here apply some simple optimizations after lowering. +// TODO: Should this be a separate pass? + +// fold constants into instructions +(ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x) +(ADDQ (MOVQconst [c]) x) && is32Bit(c) -> (ADDQconst [c] x) +(ADDL x (MOVLconst [c])) -> (ADDLconst [c] x) +(ADDL (MOVLconst [c]) x) -> (ADDLconst [c] x) +(ADDW x (MOVWconst [c])) -> (ADDWconst [c] x) +(ADDW (MOVWconst [c]) x) -> (ADDWconst [c] x) +(ADDB x (MOVBconst [c])) -> (ADDBconst [c] x) +(ADDB (MOVBconst [c]) x) -> (ADDBconst [c] x) + +(SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c]) +(SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c])) +(SUBL x (MOVLconst [c])) -> (SUBLconst x [c]) +(SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c])) +(SUBW x (MOVWconst [c])) -> (SUBWconst x [c]) +(SUBW (MOVWconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [c])) +(SUBB x (MOVBconst [c])) -> (SUBBconst x [c]) +(SUBB (MOVBconst [c]) x) -> (NEGB (SUBBconst <v.Type> x [c])) + +(MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x) +(MULQ (MOVQconst [c]) x) && is32Bit(c) -> (MULQconst [c] x) +(MULL x (MOVLconst [c])) -> (MULLconst [c] x) +(MULL (MOVLconst [c]) x) -> (MULLconst [c] x) +(MULW x (MOVWconst [c])) -> (MULWconst [c] x) +(MULW (MOVWconst [c]) x) -> (MULWconst [c] x) +(MULB x (MOVBconst [c])) -> (MULBconst [c] x) +(MULB (MOVBconst [c]) x) -> (MULBconst [c] x) + +(ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x) +(ANDQ (MOVQconst [c]) x) && is32Bit(c) -> (ANDQconst [c] x) +(ANDL x (MOVLconst [c])) -> (ANDLconst [c] x) +(ANDL (MOVLconst [c]) x) -> (ANDLconst [c] x) +(ANDW x (MOVLconst [c])) -> (ANDWconst [c] x) +(ANDW (MOVLconst [c]) x) -> (ANDWconst [c] x) +(ANDW x (MOVWconst [c])) -> (ANDWconst [c] x) +(ANDW (MOVWconst [c]) x) -> (ANDWconst [c] x) +(ANDB x (MOVLconst [c])) -> (ANDBconst [c] x) +(ANDB (MOVLconst [c]) x) -> (ANDBconst [c] x) +(ANDB x (MOVBconst [c])) -> (ANDBconst [c] x) +(ANDB (MOVBconst [c]) x) -> (ANDBconst [c] x) + +(ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x) +(ORQ (MOVQconst [c]) x) && is32Bit(c) -> (ORQconst [c] x) +(ORL x (MOVLconst [c])) -> (ORLconst [c] x) +(ORL (MOVLconst [c]) x) -> (ORLconst [c] x) +(ORW x (MOVWconst [c])) -> (ORWconst [c] x) +(ORW (MOVWconst [c]) x) -> (ORWconst [c] x) +(ORB x (MOVBconst [c])) -> (ORBconst [c] x) +(ORB (MOVBconst [c]) x) -> (ORBconst [c] x) + +(XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x) +(XORQ (MOVQconst [c]) x) && is32Bit(c) -> (XORQconst [c] x) +(XORL x (MOVLconst [c])) -> (XORLconst [c] x) +(XORL (MOVLconst [c]) x) -> (XORLconst [c] x) +(XORW x (MOVWconst [c])) -> (XORWconst [c] x) +(XORW (MOVWconst [c]) x) -> (XORWconst [c] x) +(XORB x (MOVBconst [c])) -> (XORBconst [c] x) +(XORB (MOVBconst [c]) x) -> (XORBconst [c] x) + +(SHLQ x (MOVQconst [c])) -> (SHLQconst [c&63] x) +(SHLQ x (MOVLconst [c])) -> (SHLQconst [c&63] x) +(SHLQ x (MOVWconst [c])) -> (SHLQconst [c&63] x) +(SHLQ x (MOVBconst [c])) -> (SHLQconst [c&63] x) + +(SHLL x (MOVQconst [c])) -> (SHLLconst [c&31] x) +(SHLL x (MOVLconst [c])) -> (SHLLconst [c&31] x) +(SHLL x (MOVWconst [c])) -> (SHLLconst [c&31] x) +(SHLL x (MOVBconst [c])) -> (SHLLconst [c&31] x) + +(SHLW x (MOVQconst [c])) -> (SHLWconst [c&31] x) +(SHLW x (MOVLconst [c])) -> (SHLWconst [c&31] x) +(SHLW x (MOVWconst [c])) -> (SHLWconst [c&31] x) +(SHLW x (MOVBconst [c])) -> (SHLWconst [c&31] x) + +(SHLB x (MOVQconst [c])) -> (SHLBconst [c&31] x) +(SHLB x (MOVLconst [c])) -> (SHLBconst [c&31] x) +(SHLB x (MOVWconst [c])) -> (SHLBconst [c&31] x) +(SHLB x (MOVBconst [c])) -> (SHLBconst [c&31] x) + +(SHRQ x (MOVQconst [c])) -> (SHRQconst [c&63] x) +(SHRQ x (MOVLconst [c])) -> (SHRQconst [c&63] x) +(SHRQ x (MOVWconst [c])) -> (SHRQconst [c&63] x) +(SHRQ x (MOVBconst [c])) -> (SHRQconst [c&63] x) + +(SHRL x (MOVQconst [c])) -> (SHRLconst [c&31] x) +(SHRL x (MOVLconst [c])) -> (SHRLconst [c&31] x) +(SHRL x (MOVWconst [c])) -> (SHRLconst [c&31] x) +(SHRL x (MOVBconst [c])) -> (SHRLconst [c&31] x) + +(SHRW x (MOVQconst [c])) -> (SHRWconst [c&31] x) +(SHRW x (MOVLconst [c])) -> (SHRWconst [c&31] x) +(SHRW x (MOVWconst [c])) -> (SHRWconst [c&31] x) +(SHRW x (MOVBconst [c])) -> (SHRWconst [c&31] x) + +(SHRB x (MOVQconst [c])) -> (SHRBconst [c&31] x) +(SHRB x (MOVLconst [c])) -> (SHRBconst [c&31] x) +(SHRB x (MOVWconst [c])) -> (SHRBconst [c&31] x) +(SHRB x (MOVBconst [c])) -> (SHRBconst [c&31] x) + +(SARQ x (MOVQconst [c])) -> (SARQconst [c&63] x) +(SARQ x (MOVLconst [c])) -> (SARQconst [c&63] x) +(SARQ x (MOVWconst [c])) -> (SARQconst [c&63] x) +(SARQ x (MOVBconst [c])) -> (SARQconst [c&63] x) + +(SARL x (MOVQconst [c])) -> (SARLconst [c&31] x) +(SARL x (MOVLconst [c])) -> (SARLconst [c&31] x) +(SARL x (MOVWconst [c])) -> (SARLconst [c&31] x) +(SARL x (MOVBconst [c])) -> (SARLconst [c&31] x) + +(SARW x (MOVQconst [c])) -> (SARWconst [c&31] x) +(SARW x (MOVLconst [c])) -> (SARWconst [c&31] x) +(SARW x (MOVWconst [c])) -> (SARWconst [c&31] x) +(SARW x (MOVBconst [c])) -> (SARWconst [c&31] x) + +(SARB x (MOVQconst [c])) -> (SARBconst [c&31] x) +(SARB x (MOVLconst [c])) -> (SARBconst [c&31] x) +(SARB x (MOVWconst [c])) -> (SARBconst [c&31] x) +(SARB x (MOVBconst [c])) -> (SARBconst [c&31] x) + +// Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) +// because the x86 instructions are defined to use all 5 bits of the shift even +// for the small shifts. I don't think we'll ever generate a weird shift (e.g. +// (SHLW x (MOVWconst [24])), but just in case. + +(CMPQ x (MOVQconst [c])) && is32Bit(c) -> (CMPQconst x [c]) +(CMPQ (MOVQconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPQconst x [c])) +(CMPL x (MOVLconst [c])) -> (CMPLconst x [c]) +(CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c])) +(CMPW x (MOVWconst [c])) -> (CMPWconst x [c]) +(CMPW (MOVWconst [c]) x) -> (InvertFlags (CMPWconst x [c])) +(CMPB x (MOVBconst [c])) -> (CMPBconst x [c]) +(CMPB (MOVBconst [c]) x) -> (InvertFlags (CMPBconst x [c])) + +// strength reduction +(MULQconst [-1] x) -> (NEGQ x) +(MULQconst [0] _) -> (MOVQconst [0]) +(MULQconst [1] x) -> x +(MULQconst [3] x) -> (LEAQ2 x x) +(MULQconst [5] x) -> (LEAQ4 x x) +(MULQconst [9] x) -> (LEAQ8 x x) +(MULQconst [c] x) && isPowerOfTwo(c) -> (SHLQconst [log2(c)] x) + +// combine add/shift into LEAQ +(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y) +(ADDQ x (SHLQconst [2] y)) -> (LEAQ4 x y) +(ADDQ x (SHLQconst [1] y)) -> (LEAQ2 x y) +(ADDQ x (ADDQ y y)) -> (LEAQ2 x y) +(ADDQ x (ADDQ x y)) -> (LEAQ2 y x) +(ADDQ x (ADDQ y x)) -> (LEAQ2 y x) + +// combine ADDQ/ADDQconst into LEAQ1 +(ADDQconst [c] (ADDQ x y)) -> (LEAQ1 [c] x y) +(ADDQ (ADDQconst [c] x) y) -> (LEAQ1 [c] x y) +(ADDQ x (ADDQconst [c] y)) -> (LEAQ1 [c] x y) + +// fold ADDQ into LEAQ +(ADDQconst [c] (LEAQ [d] {s} x)) -> (LEAQ [c+d] {s} x) +(LEAQ [c] {s} (ADDQconst [d] x)) -> (LEAQ [c+d] {s} x) +(LEAQ [c] {s} (ADDQ x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y) +(ADDQ x (LEAQ [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y) +(ADDQ (LEAQ [c] {s} x) y) && x.Op != OpSB && y.Op != OpSB -> (LEAQ1 [c] {s} x y) + +// fold ADDQconst into leaqX +(ADDQconst [c] (LEAQ1 [d] {s} x y)) -> (LEAQ1 [c+d] {s} x y) +(ADDQconst [c] (LEAQ2 [d] {s} x y)) -> (LEAQ2 [c+d] {s} x y) +(ADDQconst [c] (LEAQ4 [d] {s} x y)) -> (LEAQ4 [c+d] {s} x y) +(ADDQconst [c] (LEAQ8 [d] {s} x y)) -> (LEAQ8 [c+d] {s} x y) +(LEAQ1 [c] {s} (ADDQconst [d] x) y) && x.Op != OpSB -> (LEAQ1 [c+d] {s} x y) +(LEAQ1 [c] {s} x (ADDQconst [d] y)) && y.Op != OpSB -> (LEAQ1 [c+d] {s} x y) +(LEAQ2 [c] {s} (ADDQconst [d] x) y) && x.Op != OpSB -> (LEAQ2 [c+d] {s} x y) +(LEAQ2 [c] {s} x (ADDQconst [d] y)) && y.Op != OpSB -> (LEAQ2 [c+2*d] {s} x y) +(LEAQ4 [c] {s} (ADDQconst [d] x) y) && x.Op != OpSB -> (LEAQ4 [c+d] {s} x y) +(LEAQ4 [c] {s} x (ADDQconst [d] y)) && y.Op != OpSB -> (LEAQ4 [c+4*d] {s} x y) +(LEAQ8 [c] {s} (ADDQconst [d] x) y) && x.Op != OpSB -> (LEAQ8 [c+d] {s} x y) +(LEAQ8 [c] {s} x (ADDQconst [d] y)) && y.Op != OpSB -> (LEAQ8 [c+8*d] {s} x y) + +// reverse ordering of compare instruction +(SETL (InvertFlags x)) -> (SETG x) +(SETG (InvertFlags x)) -> (SETL x) +(SETB (InvertFlags x)) -> (SETA x) +(SETA (InvertFlags x)) -> (SETB x) +(SETLE (InvertFlags x)) -> (SETGE x) +(SETGE (InvertFlags x)) -> (SETLE x) +(SETBE (InvertFlags x)) -> (SETAE x) +(SETAE (InvertFlags x)) -> (SETBE x) +(SETEQ (InvertFlags x)) -> (SETEQ x) +(SETNE (InvertFlags x)) -> (SETNE x) + +// sign extended loads +// Note: The combined instruction must end up in the same block +// as the original load. If not, we end up making a value with +// memory type live in two different blocks, which can lead to +// multiple memory values alive simultaneously. +(MOVBQSX (MOVBload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) +(MOVBQZX (MOVBload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVBQZXload <v.Type> [off] {sym} ptr mem) +(MOVWQSX (MOVWload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) +(MOVWQZX (MOVWload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVWQZXload <v.Type> [off] {sym} ptr mem) +(MOVLQSX (MOVLload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) +(MOVLQZX (MOVLload [off] {sym} ptr mem)) -> @v.Args[0].Block (MOVLQZXload <v.Type> [off] {sym} ptr mem) + +// replace load from same location as preceding store with copy +(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x +(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x +(MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x +(MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x + +// Fold extensions and ANDs together. +(MOVBQZX (ANDBconst [c] x)) -> (ANDQconst [c & 0xff] x) +(MOVWQZX (ANDWconst [c] x)) -> (ANDQconst [c & 0xffff] x) +(MOVLQZX (ANDLconst [c] x)) -> (ANDQconst [c & 0xffffffff] x) +(MOVBQSX (ANDBconst [c] x)) && c & 0x80 == 0 -> (ANDQconst [c & 0x7f] x) +(MOVWQSX (ANDWconst [c] x)) && c & 0x8000 == 0 -> (ANDQconst [c & 0x7fff] x) +(MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDQconst [c & 0x7fffffff] x) + +// Don't extend before storing +(MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem) +(MOVWstore [off] {sym} ptr (MOVWQSX x) mem) -> (MOVWstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVBQSX x) mem) -> (MOVBstore [off] {sym} ptr x mem) +(MOVLstore [off] {sym} ptr (MOVLQZX x) mem) -> (MOVLstore [off] {sym} ptr x mem) +(MOVWstore [off] {sym} ptr (MOVWQZX x) mem) -> (MOVWstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVBQZX x) mem) -> (MOVBstore [off] {sym} ptr x mem) + +// fold constants into memory operations +// Note that this is not always a good idea because if not all the uses of +// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now +// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one. +// Nevertheless, let's do it! +(MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem) -> (MOVQload [addOff(off1, off2)] {sym} ptr mem) +(MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem) -> (MOVLload [addOff(off1, off2)] {sym} ptr mem) +(MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem) -> (MOVWload [addOff(off1, off2)] {sym} ptr mem) +(MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem) -> (MOVBload [addOff(off1, off2)] {sym} ptr mem) +(MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem) -> (MOVSSload [addOff(off1, off2)] {sym} ptr mem) +(MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem) -> (MOVSDload [addOff(off1, off2)] {sym} ptr mem) +(MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem) -> (MOVOload [addOff(off1, off2)] {sym} ptr mem) + +(MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem) -> (MOVQstore [addOff(off1, off2)] {sym} ptr val mem) +(MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem) -> (MOVLstore [addOff(off1, off2)] {sym} ptr val mem) +(MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem) -> (MOVWstore [addOff(off1, off2)] {sym} ptr val mem) +(MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem) -> (MOVBstore [addOff(off1, off2)] {sym} ptr val mem) +(MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem) -> (MOVSSstore [addOff(off1, off2)] {sym} ptr val mem) +(MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem) -> (MOVSDstore [addOff(off1, off2)] {sym} ptr val mem) +(MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem) -> (MOVOstore [addOff(off1, off2)] {sym} ptr val mem) + +// Fold constants into stores. +(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> + (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem) +(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) -> + (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) +(MOVWstore [off] {sym} ptr (MOVWconst [c]) mem) && validOff(off) -> + (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) +(MOVBstore [off] {sym} ptr (MOVBconst [c]) mem) && validOff(off) -> + (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem) + +// Fold address offsets into constant stores. +(MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) +(MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) +(MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) +(MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) + +// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows +// what variables are being read/written by the ops. +(MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVQload [addOff(off1,off2)] {mergeSym(sym1,sym2)} base mem) +(MOVLload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVLload [addOff(off1,off2)] {mergeSym(sym1,sym2)} base mem) +(MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVWload [addOff(off1,off2)] {mergeSym(sym1,sym2)} base mem) +(MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVBload [addOff(off1,off2)] {mergeSym(sym1,sym2)} base mem) +(MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVSSload [addOff(off1,off2)] {mergeSym(sym1,sym2)} base mem) +(MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVSDload [addOff(off1,off2)] {mergeSym(sym1,sym2)} base mem) +(MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVOload [addOff(off1,off2)] {mergeSym(sym1,sym2)} base mem) + +(MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVQstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem) +(MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVLstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem) +(MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVWstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem) +(MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVBstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem) +(MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVSSstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem) +(MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVSDstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem) +(MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVOstore [addOff(off1,off2)] {mergeSym(sym1,sym2)} base val mem) + +(MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + +// generating indexed loads and stores +(MOVBload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVBloadidx1 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVWloadidx2 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVLload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVLloadidx4 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVQload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVQloadidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVSSload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVSSloadidx4 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVSDload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVSDloadidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx mem) + +(MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) -> + (MOVBstoreidx1 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem) +(MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) -> + (MOVWstoreidx2 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem) +(MOVLstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) -> + (MOVLstoreidx4 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem) +(MOVQstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) -> + (MOVQstoreidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem) +(MOVSSstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) -> + (MOVSSstoreidx4 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem) +(MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && canMergeSym(sym1, sym2) -> + (MOVSDstoreidx8 [addOff(off1, off2)] {mergeSym(sym1,sym2)} ptr idx val mem) + +(MOVBload [off] {sym} (ADDQ ptr idx) mem) && ptr.Op != OpSB -> (MOVBloadidx1 [off] {sym} ptr idx mem) +(MOVBstore [off] {sym} (ADDQ ptr idx) val mem) && ptr.Op != OpSB -> (MOVBstoreidx1 [off] {sym} ptr idx val mem) + +(MOVBstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVBstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> + (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) +(MOVBstoreconst [x] {sym} (ADDQ ptr idx) mem) -> (MOVBstoreconstidx1 [x] {sym} ptr idx mem) + +// combine ADDQ into indexed loads and stores +(MOVBloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) -> (MOVBloadidx1 [c+d] {sym} ptr idx mem) +(MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) -> (MOVWloadidx2 [c+d] {sym} ptr idx mem) +(MOVLloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) -> (MOVLloadidx4 [c+d] {sym} ptr idx mem) +(MOVQloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) -> (MOVQloadidx8 [c+d] {sym} ptr idx mem) +(MOVSSloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) -> (MOVSSloadidx4 [c+d] {sym} ptr idx mem) +(MOVSDloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) -> (MOVSDloadidx8 [c+d] {sym} ptr idx mem) + +(MOVBstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) -> (MOVBstoreidx1 [c+d] {sym} ptr idx val mem) +(MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) -> (MOVWstoreidx2 [c+d] {sym} ptr idx val mem) +(MOVLstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) -> (MOVLstoreidx4 [c+d] {sym} ptr idx val mem) +(MOVQstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) -> (MOVQstoreidx8 [c+d] {sym} ptr idx val mem) +(MOVSSstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) -> (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem) +(MOVSDstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) -> (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem) + +(MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) -> (MOVBloadidx1 [c+d] {sym} ptr idx mem) +(MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem) -> (MOVWloadidx2 [c+2*d] {sym} ptr idx mem) +(MOVLloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) -> (MOVLloadidx4 [c+4*d] {sym} ptr idx mem) +(MOVQloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) -> (MOVQloadidx8 [c+8*d] {sym} ptr idx mem) +(MOVSSloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) -> (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem) +(MOVSDloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) -> (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem) + +(MOVBstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) -> (MOVBstoreidx1 [c+d] {sym} ptr idx val mem) +(MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem) -> (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem) +(MOVLstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) -> (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem) +(MOVQstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) -> (MOVQstoreidx8 [c+8*d] {sym} ptr idx val mem) +(MOVSSstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) -> (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem) +(MOVSDstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) -> (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem) + +(MOVBstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) -> + (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) +(MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) -> + (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) +(MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) -> + (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) +(MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) -> + (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem) + +(MOVBstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) -> + (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) +(MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) -> + (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) +(MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) -> + (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) +(MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) -> + (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem) + +// fold LEAQs together +(LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && canMergeSym(sym1, sym2) -> + (LEAQ [addOff(off1,off2)] {mergeSym(sym1,sym2)} x) + +// LEAQ into LEAQ1 +(LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && canMergeSym(sym1, sym2) && x.Op != OpSB -> + (LEAQ1 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y) +(LEAQ1 [off1] {sym1} x (LEAQ [off2] {sym2} y)) && canMergeSym(sym1, sym2) && y.Op != OpSB -> + (LEAQ1 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y) + +// LEAQ1 into LEAQ +(LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && canMergeSym(sym1, sym2) -> + (LEAQ1 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y) + +// LEAQ into LEAQ[248] +(LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && canMergeSym(sym1, sym2) && x.Op != OpSB -> + (LEAQ2 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y) +(LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && canMergeSym(sym1, sym2) && x.Op != OpSB -> + (LEAQ4 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y) +(LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && canMergeSym(sym1, sym2) && x.Op != OpSB -> + (LEAQ8 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y) + +// LEAQ[248] into LEAQ +(LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && canMergeSym(sym1, sym2) -> + (LEAQ2 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y) +(LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && canMergeSym(sym1, sym2) -> + (LEAQ4 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y) +(LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && canMergeSym(sym1, sym2) -> + (LEAQ8 [addOff(off1,off2)] {mergeSym(sym1,sym2)} x y) + +// lower Zero instructions with word sizes +(Zero [0] _ mem) -> mem +(Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem) +(Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem) +(Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem) +(Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem) + +(Zero [3] destptr mem) -> + (MOVBstoreconst [makeValAndOff(0,2)] destptr + (MOVWstoreconst [0] destptr mem)) +(Zero [5] destptr mem) -> + (MOVBstoreconst [makeValAndOff(0,4)] destptr + (MOVLstoreconst [0] destptr mem)) +(Zero [6] destptr mem) -> + (MOVWstoreconst [makeValAndOff(0,4)] destptr + (MOVLstoreconst [0] destptr mem)) +(Zero [7] destptr mem) -> + (MOVLstoreconst [makeValAndOff(0,3)] destptr + (MOVLstoreconst [0] destptr mem)) + +// Strip off any fractional word zeroing. +(Zero [size] destptr mem) && size%8 != 0 && size > 8 -> + (Zero [size-size%8] (ADDQconst destptr [size%8]) + (MOVQstoreconst [0] destptr mem)) + +// Zero small numbers of words directly. +(Zero [16] destptr mem) -> + (MOVQstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [0] destptr mem)) +(Zero [24] destptr mem) -> + (MOVQstoreconst [makeValAndOff(0,16)] destptr + (MOVQstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [0] destptr mem))) +(Zero [32] destptr mem) -> + (MOVQstoreconst [makeValAndOff(0,24)] destptr + (MOVQstoreconst [makeValAndOff(0,16)] destptr + (MOVQstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [0] destptr mem)))) + +// Medium zeroing uses a duff device. +(Zero [size] destptr mem) && size <= 1024 && size%8 == 0 && size%16 != 0 -> + (Zero [size-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem)) +(Zero [size] destptr mem) && size <= 1024 && size%16 == 0 -> + (DUFFZERO [duffStart(size)] (ADDQconst [duffAdj(size)] destptr) (MOVOconst [0]) mem) + +// Large zeroing uses REP STOSQ. +(Zero [size] destptr mem) && size > 1024 && size%8 == 0 -> + (REPSTOSQ destptr (MOVQconst [size/8]) (MOVQconst [0]) mem) + +// Absorb InvertFlags into branches. +(LT (InvertFlags cmp) yes no) -> (GT cmp yes no) +(GT (InvertFlags cmp) yes no) -> (LT cmp yes no) +(LE (InvertFlags cmp) yes no) -> (GE cmp yes no) +(GE (InvertFlags cmp) yes no) -> (LE cmp yes no) +(ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no) +(UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no) +(ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no) +(UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no) +(EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no) +(NE (InvertFlags cmp) yes no) -> (NE cmp yes no) + +// Constant comparisons. +(CMPQconst (MOVQconst [x]) [y]) && x==y -> (FlagEQ) +(CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)<uint64(y) -> (FlagLT_ULT) +(CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)>uint64(y) -> (FlagLT_UGT) +(CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)<uint64(y) -> (FlagGT_ULT) +(CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)>uint64(y) -> (FlagGT_UGT) +(CMPLconst (MOVLconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ) +(CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT) +(CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT) +(CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT) +(CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT) +(CMPWconst (MOVWconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ) +(CMPWconst (MOVWconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT) +(CMPWconst (MOVWconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT) +(CMPWconst (MOVWconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT) +(CMPWconst (MOVWconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT) +(CMPBconst (MOVBconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ) +(CMPBconst (MOVBconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT) +(CMPBconst (MOVBconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT) +(CMPBconst (MOVBconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT) +(CMPBconst (MOVBconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT) + +// Other known comparisons. +(CMPQconst (ANDQconst _ [m]) [n]) && m+1==n && isPowerOfTwo(n) -> (FlagLT_ULT) +(CMPLconst (ANDLconst _ [m]) [n]) && int32(m)+1==int32(n) && isPowerOfTwo(int64(int32(n))) -> (FlagLT_ULT) +(CMPWconst (ANDWconst _ [m]) [n]) && int16(m)+1==int16(n) && isPowerOfTwo(int64(int16(n))) -> (FlagLT_ULT) +(CMPBconst (ANDBconst _ [m]) [n]) && int8(m)+1==int8(n) && isPowerOfTwo(int64(int8(n))) -> (FlagLT_ULT) +// TODO: DIVxU also. + +// Absorb flag constants into SBB ops. +(SBBQcarrymask (FlagEQ)) -> (MOVQconst [0]) +(SBBQcarrymask (FlagLT_ULT)) -> (MOVQconst [-1]) +(SBBQcarrymask (FlagLT_UGT)) -> (MOVQconst [0]) +(SBBQcarrymask (FlagGT_ULT)) -> (MOVQconst [-1]) +(SBBQcarrymask (FlagGT_UGT)) -> (MOVQconst [0]) +(SBBLcarrymask (FlagEQ)) -> (MOVLconst [0]) +(SBBLcarrymask (FlagLT_ULT)) -> (MOVLconst [-1]) +(SBBLcarrymask (FlagLT_UGT)) -> (MOVLconst [0]) +(SBBLcarrymask (FlagGT_ULT)) -> (MOVLconst [-1]) +(SBBLcarrymask (FlagGT_UGT)) -> (MOVLconst [0]) + +// Absorb flag constants into branches. +(EQ (FlagEQ) yes no) -> (First nil yes no) +(EQ (FlagLT_ULT) yes no) -> (First nil no yes) +(EQ (FlagLT_UGT) yes no) -> (First nil no yes) +(EQ (FlagGT_ULT) yes no) -> (First nil no yes) +(EQ (FlagGT_UGT) yes no) -> (First nil no yes) + +(NE (FlagEQ) yes no) -> (First nil no yes) +(NE (FlagLT_ULT) yes no) -> (First nil yes no) +(NE (FlagLT_UGT) yes no) -> (First nil yes no) +(NE (FlagGT_ULT) yes no) -> (First nil yes no) +(NE (FlagGT_UGT) yes no) -> (First nil yes no) + +(LT (FlagEQ) yes no) -> (First nil no yes) +(LT (FlagLT_ULT) yes no) -> (First nil yes no) +(LT (FlagLT_UGT) yes no) -> (First nil yes no) +(LT (FlagGT_ULT) yes no) -> (First nil no yes) +(LT (FlagGT_UGT) yes no) -> (First nil no yes) + +(LE (FlagEQ) yes no) -> (First nil yes no) +(LE (FlagLT_ULT) yes no) -> (First nil yes no) +(LE (FlagLT_UGT) yes no) -> (First nil yes no) +(LE (FlagGT_ULT) yes no) -> (First nil no yes) +(LE (FlagGT_UGT) yes no) -> (First nil no yes) + +(GT (FlagEQ) yes no) -> (First nil no yes) +(GT (FlagLT_ULT) yes no) -> (First nil no yes) +(GT (FlagLT_UGT) yes no) -> (First nil no yes) +(GT (FlagGT_ULT) yes no) -> (First nil yes no) +(GT (FlagGT_UGT) yes no) -> (First nil yes no) + +(GE (FlagEQ) yes no) -> (First nil yes no) +(GE (FlagLT_ULT) yes no) -> (First nil no yes) +(GE (FlagLT_UGT) yes no) -> (First nil no yes) +(GE (FlagGT_ULT) yes no) -> (First nil yes no) +(GE (FlagGT_UGT) yes no) -> (First nil yes no) + +(ULT (FlagEQ) yes no) -> (First nil no yes) +(ULT (FlagLT_ULT) yes no) -> (First nil yes no) +(ULT (FlagLT_UGT) yes no) -> (First nil no yes) +(ULT (FlagGT_ULT) yes no) -> (First nil yes no) +(ULT (FlagGT_UGT) yes no) -> (First nil no yes) + +(ULE (FlagEQ) yes no) -> (First nil yes no) +(ULE (FlagLT_ULT) yes no) -> (First nil yes no) +(ULE (FlagLT_UGT) yes no) -> (First nil no yes) +(ULE (FlagGT_ULT) yes no) -> (First nil yes no) +(ULE (FlagGT_UGT) yes no) -> (First nil no yes) + +(UGT (FlagEQ) yes no) -> (First nil no yes) +(UGT (FlagLT_ULT) yes no) -> (First nil no yes) +(UGT (FlagLT_UGT) yes no) -> (First nil yes no) +(UGT (FlagGT_ULT) yes no) -> (First nil no yes) +(UGT (FlagGT_UGT) yes no) -> (First nil yes no) + +(UGE (FlagEQ) yes no) -> (First nil yes no) +(UGE (FlagLT_ULT) yes no) -> (First nil no yes) +(UGE (FlagLT_UGT) yes no) -> (First nil yes no) +(UGE (FlagGT_ULT) yes no) -> (First nil no yes) +(UGE (FlagGT_UGT) yes no) -> (First nil yes no) + +// Absorb flag constants into SETxx ops. +(SETEQ (FlagEQ)) -> (MOVBconst [1]) +(SETEQ (FlagLT_ULT)) -> (MOVBconst [0]) +(SETEQ (FlagLT_UGT)) -> (MOVBconst [0]) +(SETEQ (FlagGT_ULT)) -> (MOVBconst [0]) +(SETEQ (FlagGT_UGT)) -> (MOVBconst [0]) + +(SETNE (FlagEQ)) -> (MOVBconst [0]) +(SETNE (FlagLT_ULT)) -> (MOVBconst [1]) +(SETNE (FlagLT_UGT)) -> (MOVBconst [1]) +(SETNE (FlagGT_ULT)) -> (MOVBconst [1]) +(SETNE (FlagGT_UGT)) -> (MOVBconst [1]) + +(SETL (FlagEQ)) -> (MOVBconst [0]) +(SETL (FlagLT_ULT)) -> (MOVBconst [1]) +(SETL (FlagLT_UGT)) -> (MOVBconst [1]) +(SETL (FlagGT_ULT)) -> (MOVBconst [0]) +(SETL (FlagGT_UGT)) -> (MOVBconst [0]) + +(SETLE (FlagEQ)) -> (MOVBconst [1]) +(SETLE (FlagLT_ULT)) -> (MOVBconst [1]) +(SETLE (FlagLT_UGT)) -> (MOVBconst [1]) +(SETLE (FlagGT_ULT)) -> (MOVBconst [0]) +(SETLE (FlagGT_UGT)) -> (MOVBconst [0]) + +(SETG (FlagEQ)) -> (MOVBconst [0]) +(SETG (FlagLT_ULT)) -> (MOVBconst [0]) +(SETG (FlagLT_UGT)) -> (MOVBconst [0]) +(SETG (FlagGT_ULT)) -> (MOVBconst [1]) +(SETG (FlagGT_UGT)) -> (MOVBconst [1]) + +(SETGE (FlagEQ)) -> (MOVBconst [1]) +(SETGE (FlagLT_ULT)) -> (MOVBconst [0]) +(SETGE (FlagLT_UGT)) -> (MOVBconst [0]) +(SETGE (FlagGT_ULT)) -> (MOVBconst [1]) +(SETGE (FlagGT_UGT)) -> (MOVBconst [1]) + +(SETB (FlagEQ)) -> (MOVBconst [0]) +(SETB (FlagLT_ULT)) -> (MOVBconst [1]) +(SETB (FlagLT_UGT)) -> (MOVBconst [0]) +(SETB (FlagGT_ULT)) -> (MOVBconst [1]) +(SETB (FlagGT_UGT)) -> (MOVBconst [0]) + +(SETBE (FlagEQ)) -> (MOVBconst [1]) +(SETBE (FlagLT_ULT)) -> (MOVBconst [1]) +(SETBE (FlagLT_UGT)) -> (MOVBconst [0]) +(SETBE (FlagGT_ULT)) -> (MOVBconst [1]) +(SETBE (FlagGT_UGT)) -> (MOVBconst [0]) + +(SETA (FlagEQ)) -> (MOVBconst [0]) +(SETA (FlagLT_ULT)) -> (MOVBconst [0]) +(SETA (FlagLT_UGT)) -> (MOVBconst [1]) +(SETA (FlagGT_ULT)) -> (MOVBconst [0]) +(SETA (FlagGT_UGT)) -> (MOVBconst [1]) + +(SETAE (FlagEQ)) -> (MOVBconst [1]) +(SETAE (FlagLT_ULT)) -> (MOVBconst [0]) +(SETAE (FlagLT_UGT)) -> (MOVBconst [1]) +(SETAE (FlagGT_ULT)) -> (MOVBconst [0]) +(SETAE (FlagGT_UGT)) -> (MOVBconst [1]) + +// Remove redundant *const ops +(ADDQconst [0] x) -> x +(ADDLconst [c] x) && int32(c)==0 -> x +(ADDWconst [c] x) && int16(c)==0 -> x +(ADDBconst [c] x) && int8(c)==0 -> x +(SUBQconst [0] x) -> x +(SUBLconst [c] x) && int32(c) == 0 -> x +(SUBWconst [c] x) && int16(c) == 0 -> x +(SUBBconst [c] x) && int8(c) == 0 -> x +(ANDQconst [0] _) -> (MOVQconst [0]) +(ANDLconst [c] _) && int32(c)==0 -> (MOVLconst [0]) +(ANDWconst [c] _) && int16(c)==0 -> (MOVWconst [0]) +(ANDBconst [c] _) && int8(c)==0 -> (MOVBconst [0]) +(ANDQconst [-1] x) -> x +(ANDLconst [c] x) && int32(c)==-1 -> x +(ANDWconst [c] x) && int16(c)==-1 -> x +(ANDBconst [c] x) && int8(c)==-1 -> x +(ORQconst [0] x) -> x +(ORLconst [c] x) && int32(c)==0 -> x +(ORWconst [c] x) && int16(c)==0 -> x +(ORBconst [c] x) && int8(c)==0 -> x +(ORQconst [-1] _) -> (MOVQconst [-1]) +(ORLconst [c] _) && int32(c)==-1 -> (MOVLconst [-1]) +(ORWconst [c] _) && int16(c)==-1 -> (MOVWconst [-1]) +(ORBconst [c] _) && int8(c)==-1 -> (MOVBconst [-1]) +(XORQconst [0] x) -> x +(XORLconst [c] x) && int32(c)==0 -> x +(XORWconst [c] x) && int16(c)==0 -> x +(XORBconst [c] x) && int8(c)==0 -> x + +// generic constant folding +// TODO: more of this +(ADDQconst [c] (MOVQconst [d])) -> (MOVQconst [c+d]) +(ADDLconst [c] (MOVLconst [d])) -> (MOVLconst [c+d]) +(ADDWconst [c] (MOVWconst [d])) -> (MOVWconst [c+d]) +(ADDBconst [c] (MOVBconst [d])) -> (MOVBconst [c+d]) +(ADDQconst [c] (ADDQconst [d] x)) -> (ADDQconst [c+d] x) +(ADDLconst [c] (ADDLconst [d] x)) -> (ADDLconst [c+d] x) +(ADDWconst [c] (ADDWconst [d] x)) -> (ADDWconst [c+d] x) +(ADDBconst [c] (ADDBconst [d] x)) -> (ADDBconst [c+d] x) +(SUBQconst [c] (MOVQconst [d])) -> (MOVQconst [d-c]) +(SUBLconst [c] (MOVLconst [d])) -> (MOVLconst [d-c]) +(SUBWconst [c] (MOVWconst [d])) -> (MOVWconst [d-c]) +(SUBBconst [c] (MOVBconst [d])) -> (MOVBconst [d-c]) +(SUBQconst [c] (SUBQconst [d] x)) -> (ADDQconst [-c-d] x) +(SUBLconst [c] (SUBLconst [d] x)) -> (ADDLconst [-c-d] x) +(SUBWconst [c] (SUBWconst [d] x)) -> (ADDWconst [-c-d] x) +(SUBBconst [c] (SUBBconst [d] x)) -> (ADDBconst [-c-d] x) +(SARQconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) +(SARLconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) +(SARWconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) +(SARBconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) +(NEGQ (MOVQconst [c])) -> (MOVQconst [-c]) +(NEGL (MOVLconst [c])) -> (MOVLconst [-c]) +(NEGW (MOVWconst [c])) -> (MOVWconst [-c]) +(NEGB (MOVBconst [c])) -> (MOVBconst [-c]) +(MULQconst [c] (MOVQconst [d])) -> (MOVQconst [c*d]) +(MULLconst [c] (MOVLconst [d])) -> (MOVLconst [c*d]) +(MULWconst [c] (MOVWconst [d])) -> (MOVWconst [c*d]) +(MULBconst [c] (MOVBconst [d])) -> (MOVBconst [c*d]) +(ANDQconst [c] (MOVQconst [d])) -> (MOVQconst [c&d]) +(ANDLconst [c] (MOVLconst [d])) -> (MOVLconst [c&d]) +(ANDWconst [c] (MOVWconst [d])) -> (MOVWconst [c&d]) +(ANDBconst [c] (MOVBconst [d])) -> (MOVBconst [c&d]) +(ORQconst [c] (MOVQconst [d])) -> (MOVQconst [c|d]) +(ORLconst [c] (MOVLconst [d])) -> (MOVLconst [c|d]) +(ORWconst [c] (MOVWconst [d])) -> (MOVWconst [c|d]) +(ORBconst [c] (MOVBconst [d])) -> (MOVBconst [c|d]) +(XORQconst [c] (MOVQconst [d])) -> (MOVQconst [c^d]) +(XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d]) +(XORWconst [c] (MOVWconst [d])) -> (MOVWconst [c^d]) +(XORBconst [c] (MOVBconst [d])) -> (MOVBconst [c^d]) +(NOTQ (MOVQconst [c])) -> (MOVQconst [^c]) +(NOTL (MOVLconst [c])) -> (MOVLconst [^c]) +(NOTW (MOVWconst [c])) -> (MOVWconst [^c]) +(NOTB (MOVBconst [c])) -> (MOVBconst [^c]) + +// generic simplifications +// TODO: more of this +(ADDQ x (NEGQ y)) -> (SUBQ x y) +(ADDL x (NEGL y)) -> (SUBL x y) +(ADDW x (NEGW y)) -> (SUBW x y) +(ADDB x (NEGB y)) -> (SUBB x y) +(SUBQ x x) -> (MOVQconst [0]) +(SUBL x x) -> (MOVLconst [0]) +(SUBW x x) -> (MOVWconst [0]) +(SUBB x x) -> (MOVBconst [0]) +(ANDQ x x) -> x +(ANDL x x) -> x +(ANDW x x) -> x +(ANDB x x) -> x +(ORQ x x) -> x +(ORL x x) -> x +(ORW x x) -> x +(ORB x x) -> x +(XORQ x x) -> (MOVQconst [0]) +(XORL x x) -> (MOVLconst [0]) +(XORW x x) -> (MOVWconst [0]) +(XORB x x) -> (MOVBconst [0]) + +// checking AND against 0. +(CMPQconst (ANDQ x y) [0]) -> (TESTQ x y) +(CMPLconst (ANDL x y) [0]) -> (TESTL x y) +(CMPWconst (ANDW x y) [0]) -> (TESTW x y) +(CMPBconst (ANDB x y) [0]) -> (TESTB x y) +(CMPQconst (ANDQconst [c] x) [0]) -> (TESTQconst [c] x) +(CMPLconst (ANDLconst [c] x) [0]) -> (TESTLconst [c] x) +(CMPWconst (ANDWconst [c] x) [0]) -> (TESTWconst [c] x) +(CMPBconst (ANDBconst [c] x) [0]) -> (TESTBconst [c] x) |
