diff options
| author | erifan01 <eric.fang@arm.com> | 2019-01-22 09:10:59 +0000 |
|---|---|---|
| committer | Cherry Zhang <cherryyz@google.com> | 2019-03-03 20:20:10 +0000 |
| commit | 159b2de4428d47e422ffd696e0f0c62353c4bb79 (patch) | |
| tree | 44466885e08fc5fa580fab71b5b05741134440e0 /src | |
| parent | 83610c90bbe4f5f0b18ac01da3f3921c2f7090e4 (diff) | |
| download | go-159b2de4428d47e422ffd696e0f0c62353c4bb79.tar.xz | |
cmd/compile: optimize math/bits.Div32 for arm64
Benchmark:
name old time/op new time/op delta
Div-8 22.0ns ± 0% 22.0ns ± 0% ~ (all equal)
Div32-8 6.51ns ± 0% 3.00ns ± 0% -53.90% (p=0.000 n=10+8)
Div64-8 22.5ns ± 0% 22.5ns ± 0% ~ (all equal)
Code:
func div32(hi, lo, y uint32) (q, r uint32) {return bits.Div32(hi, lo, y)}
Before:
0x0020 00032 (test.go:24) MOVWU "".y+8(FP), R0
0x0024 00036 ($GOROOT/src/math/bits/bits.go:472) CBZW R0, 132
0x0028 00040 ($GOROOT/src/math/bits/bits.go:472) MOVWU "".hi(FP), R1
0x002c 00044 ($GOROOT/src/math/bits/bits.go:472) CMPW R1, R0
0x0030 00048 ($GOROOT/src/math/bits/bits.go:472) BLS 96
0x0034 00052 ($GOROOT/src/math/bits/bits.go:475) MOVWU "".lo+4(FP), R2
0x0038 00056 ($GOROOT/src/math/bits/bits.go:475) ORR R1<<32, R2, R1
0x003c 00060 ($GOROOT/src/math/bits/bits.go:476) CBZ R0, 140
0x0040 00064 ($GOROOT/src/math/bits/bits.go:476) UDIV R0, R1, R2
0x0044 00068 (test.go:24) MOVW R2, "".q+16(FP)
0x0048 00072 ($GOROOT/src/math/bits/bits.go:476) UREM R0, R1, R0
0x0050 00080 (test.go:24) MOVW R0, "".r+20(FP)
0x0054 00084 (test.go:24) MOVD -8(RSP), R29
0x0058 00088 (test.go:24) MOVD.P 32(RSP), R30
0x005c 00092 (test.go:24) RET (R30)
After:
0x001c 00028 (test.go:24) MOVWU "".y+8(FP), R0
0x0020 00032 (test.go:24) CBZW R0, 92
0x0024 00036 (test.go:24) MOVWU "".hi(FP), R1
0x0028 00040 (test.go:24) CMPW R0, R1
0x002c 00044 (test.go:24) BHS 84
0x0030 00048 (test.go:24) MOVWU "".lo+4(FP), R2
0x0034 00052 (test.go:24) ORR R1<<32, R2, R4
0x0038 00056 (test.go:24) UDIV R0, R4, R3
0x003c 00060 (test.go:24) MSUB R3, R4, R0, R4
0x0040 00064 (test.go:24) MOVW R3, "".q+16(FP)
0x0044 00068 (test.go:24) MOVW R4, "".r+20(FP)
0x0048 00072 (test.go:24) MOVD -8(RSP), R29
0x004c 00076 (test.go:24) MOVD.P 16(RSP), R30
0x0050 00080 (test.go:24) RET (R30)
UREM instruction in the previous assembly code will be converted to UDIV and MSUB instructions
on arm64. However the UDIV instruction in UREM is unnecessary, because it's a duplicate of the
previous UDIV. This CL adds a rule to have this extra UDIV instruction removed by CSE.
Change-Id: Ie2508784320020b2de022806d09f75a7871bb3d7
Reviewed-on: https://go-review.googlesource.com/c/159577
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Bryan C. Mills <bcmills@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src')
| -rw-r--r-- | src/cmd/compile/internal/ssa/gen/ARM64.rules | 7 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewriteARM64.go | 48 |
2 files changed, 55 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 3f49a9bcf9..133a893610 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -133,6 +133,13 @@ (BitRev16 x) -> (SRLconst [48] (RBIT <typ.UInt64> x)) (BitRev8 x) -> (SRLconst [56] (RBIT <typ.UInt64> x)) +// In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into +// UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or +// after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant. +// The purpose of this rule is to have this extra UDIV instruction removed in CSE pass. +(UMOD <typ.UInt64> x y) -> (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y)) +(UMODW <typ.UInt32> x y) -> (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y)) + // boolean ops -- booleans are represented with 0=false, 1=true (AndB x y) -> (AND x y) (OrB x y) -> (OR x y) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index fe815efb14..45801a4003 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -30667,6 +30667,30 @@ func rewriteValueARM64_OpARM64UDIVW_0(v *Value) bool { return false } func rewriteValueARM64_OpARM64UMOD_0(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ + // match: (UMOD <typ.UInt64> x y) + // cond: + // result: (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y)) + for { + if v.Type != typ.UInt64 { + break + } + _ = v.Args[1] + x := v.Args[0] + y := v.Args[1] + v.reset(OpARM64MSUB) + v.Type = typ.UInt64 + v.AddArg(x) + v.AddArg(y) + v0 := b.NewValue0(v.Pos, OpARM64UDIV, typ.UInt64) + v0.AddArg(x) + v0.AddArg(y) + v.AddArg(v0) + return true + } // match: (UMOD _ (MOVDconst [1])) // cond: // result: (MOVDconst [0]) @@ -30724,6 +30748,30 @@ func rewriteValueARM64_OpARM64UMOD_0(v *Value) bool { return false } func rewriteValueARM64_OpARM64UMODW_0(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ + // match: (UMODW <typ.UInt32> x y) + // cond: + // result: (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y)) + for { + if v.Type != typ.UInt32 { + break + } + _ = v.Args[1] + x := v.Args[0] + y := v.Args[1] + v.reset(OpARM64MSUBW) + v.Type = typ.UInt32 + v.AddArg(x) + v.AddArg(y) + v0 := b.NewValue0(v.Pos, OpARM64UDIVW, typ.UInt32) + v0.AddArg(x) + v0.AddArg(y) + v.AddArg(v0) + return true + } // match: (UMODW _ (MOVDconst [c])) // cond: uint32(c)==1 // result: (MOVDconst [0]) |
