From 4d180f71dca041a39b50b9dec8a0ebec5cc02b90 Mon Sep 17 00:00:00 2001 From: ruinan Date: Thu, 18 Aug 2022 01:31:57 +0000 Subject: cmd/compile: omit redundant sign/unsign extension on arm64 On Arm64, all 32-bit instructions will ignore the upper 32 bits and clear them to zero for the result. No need to do an unsign extend before a 32 bit op. This CL removes the redundant unsign extension only for the existing 32-bit opcodes, and also omits the sign extension when the upper bit of the result can be predicted. Fixes #42162 Change-Id: I61e6670bfb8982572430e67a4fa61134a3ea240a CustomizedGitHooks: yes Reviewed-on: https://go-review.googlesource.com/c/go/+/427454 Reviewed-by: Keith Randall Auto-Submit: Eric Fang Reviewed-by: Keith Randall Run-TryBot: Eric Fang Reviewed-by: Cherry Mui TryBot-Result: Gopher Robot --- src/cmd/compile/internal/ssa/_gen/ARM64.rules | 10 ++++ src/cmd/compile/internal/ssa/_gen/ARM64Ops.go | 1 + src/cmd/compile/internal/ssa/rewrite.go | 4 ++ src/cmd/compile/internal/ssa/rewriteARM64.go | 68 +++++++++++++++++++++++++++ 4 files changed, 83 insertions(+) (limited to 'src') diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index cf43542615..34ee907e3a 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -1665,6 +1665,16 @@ // zero upper bit of the register; no need to zero-extend (MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => (MOVDreg x) +// omit unsign extension + +(MOVWUreg x) && zeroUpper32Bits(x, 3) => x + +// omit sign extension + +(MOVWreg (ANDconst x [c])) && uint64(c) & uint64(0xffffffff80000000) == 0 => (ANDconst x [c]) +(MOVHreg (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffff8000) == 0 => (ANDconst x [c]) +(MOVBreg (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffffff80) == 0 => (ANDconst x [c]) + // absorb flag constants into conditional instructions (CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go index 2a9c2ae486..ca5b929ad7 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go @@ -13,6 +13,7 @@ import "strings" // - *const instructions may use a constant larger than the instruction can encode. // In this case the assembler expands to multiple instructions and uses tmp // register (R27). +// - All 32-bit Ops will zero the upper 32 bits of the destination register. // Suffixes encode the bit width of various instructions. // D (double word) = 64 bit diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index c56447d336..54ea2d3f4f 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1298,6 +1298,10 @@ func zeroUpper32Bits(x *Value, depth int) bool { OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst, OpAMD64SHLL, OpAMD64SHLLconst: return true + case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst, + OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW, + OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst: + return true case OpArg: return x.Type.Size() == 4 case OpPhi, OpSelect0, OpSelect1: diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 7cc7a2a424..a43a366c67 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -8271,6 +8271,25 @@ func rewriteValueARM64_OpARM64MOVBreg(v *Value) bool { v.AuxInt = int64ToAuxInt(int64(int8(c))) return true } + // match: (MOVBreg (ANDconst x [c])) + // cond: uint64(c) & uint64(0xffffffffffffff80) == 0 + // result: (ANDconst x [c]) + for { + t := v.Type + if v_0.Op != OpARM64ANDconst { + break + } + c := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(uint64(c)&uint64(0xffffffffffffff80) == 0) { + break + } + v.reset(OpARM64ANDconst) + v.Type = t + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } // match: (MOVBreg (SLLconst [lc] x)) // cond: lc < 8 // result: (SBFIZ [armBFAuxInt(lc, 8-lc)] x) @@ -11991,6 +12010,25 @@ func rewriteValueARM64_OpARM64MOVHreg(v *Value) bool { v.AuxInt = int64ToAuxInt(int64(int16(c))) return true } + // match: (MOVHreg (ANDconst x [c])) + // cond: uint64(c) & uint64(0xffffffffffff8000) == 0 + // result: (ANDconst x [c]) + for { + t := v.Type + if v_0.Op != OpARM64ANDconst { + break + } + c := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(uint64(c)&uint64(0xffffffffffff8000) == 0) { + break + } + v.reset(OpARM64ANDconst) + v.Type = t + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } // match: (MOVHreg (SLLconst [lc] x)) // cond: lc < 16 // result: (SBFIZ [armBFAuxInt(lc, 16-lc)] x) @@ -13687,6 +13725,17 @@ func rewriteValueARM64_OpARM64MOVWUreg(v *Value) bool { v.AuxInt = int64ToAuxInt(int64(uint32(c))) return true } + // match: (MOVWUreg x) + // cond: zeroUpper32Bits(x, 3) + // result: x + for { + x := v_0 + if !(zeroUpper32Bits(x, 3)) { + break + } + v.copyOf(x) + return true + } // match: (MOVWUreg (SLLconst [lc] x)) // cond: lc >= 32 // result: (MOVDconst [0]) @@ -14189,6 +14238,25 @@ func rewriteValueARM64_OpARM64MOVWreg(v *Value) bool { v.AuxInt = int64ToAuxInt(int64(int32(c))) return true } + // match: (MOVWreg (ANDconst x [c])) + // cond: uint64(c) & uint64(0xffffffff80000000) == 0 + // result: (ANDconst x [c]) + for { + t := v.Type + if v_0.Op != OpARM64ANDconst { + break + } + c := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(uint64(c)&uint64(0xffffffff80000000) == 0) { + break + } + v.reset(OpARM64ANDconst) + v.Type = t + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } // match: (MOVWreg (SLLconst [lc] x)) // cond: lc < 32 // result: (SBFIZ [armBFAuxInt(lc, 32-lc)] x) -- cgit v1.3