aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorruinan <ruinan.sun@arm.com>2022-08-08 04:17:19 +0000
committerGopher Robot <gobot@golang.org>2022-09-02 17:44:29 +0000
commit54c7bc9cff748e6554e53fbbbf823fdd214d0482 (patch)
tree8520677a7dc976ac69c6c28b213f57c831be27d5
parent5befb24bb5cbd8ae6210b4d6a88a4437eec6fb0b (diff)
downloadgo-54c7bc9cff748e6554e53fbbbf823fdd214d0482.tar.xz
cmd/compile: optimize shift ops on arm64 when the shift value is v&63
For the following code case: var x uint64 x >> (shift & 63) We can directly genereta `x >> shift` on arm64, since the hardware will only use the bottom 6 bits of the shift amount. Benchmark old time/op new time/op delta ShiftArithmeticRight-8 0.40ns 0.31ns -21.7% Change-Id: Id58c8a5b2f6dd5c30c3876f4a36e11b4d81e2dc9 Reviewed-on: https://go-review.googlesource.com/c/go/+/425294 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Auto-Submit: Keith Randall <khr@golang.org> Run-TryBot: Keith Randall <khr@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com>
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM64.rules5
-rw-r--r--src/cmd/compile/internal/ssa/rewriteARM64.go36
-rw-r--r--test/codegen/shift.go9
3 files changed, 49 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index 0e1a36015a..a91f17a2d3 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -1235,9 +1235,12 @@
(EON x (MOVDconst [c])) => (XORconst [^c] x)
(ORN x (MOVDconst [c])) => (ORconst [^c] x)
-(SLL x (MOVDconst [c])) => (SLLconst x [c&63]) // Note: I don't think we ever generate bad constant shifts (i.e. c>=64)
+(SLL x (MOVDconst [c])) => (SLLconst x [c&63])
(SRL x (MOVDconst [c])) => (SRLconst x [c&63])
(SRA x (MOVDconst [c])) => (SRAconst x [c&63])
+(SLL x (ANDconst [63] y)) => (SLL x y)
+(SRL x (ANDconst [63] y)) => (SRL x y)
+(SRA x (ANDconst [63] y)) => (SRA x y)
(CMP x (MOVDconst [c])) => (CMPconst [c] x)
(CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x))
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 002659f92a..ecb8a6b779 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -20484,6 +20484,18 @@ func rewriteValueARM64_OpARM64SLL(v *Value) bool {
v.AddArg(x)
return true
}
+ // match: (SLL x (ANDconst [63] y))
+ // result: (SLL x y)
+ for {
+ x := v_0
+ if v_1.Op != OpARM64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 {
+ break
+ }
+ y := v_1.Args[0]
+ v.reset(OpARM64SLL)
+ v.AddArg2(x, y)
+ return true
+ }
return false
}
func rewriteValueARM64_OpARM64SLLconst(v *Value) bool {
@@ -20649,6 +20661,18 @@ func rewriteValueARM64_OpARM64SRA(v *Value) bool {
v.AddArg(x)
return true
}
+ // match: (SRA x (ANDconst [63] y))
+ // result: (SRA x y)
+ for {
+ x := v_0
+ if v_1.Op != OpARM64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 {
+ break
+ }
+ y := v_1.Args[0]
+ v.reset(OpARM64SRA)
+ v.AddArg2(x, y)
+ return true
+ }
return false
}
func rewriteValueARM64_OpARM64SRAconst(v *Value) bool {
@@ -20806,6 +20830,18 @@ func rewriteValueARM64_OpARM64SRL(v *Value) bool {
v.AddArg(x)
return true
}
+ // match: (SRL x (ANDconst [63] y))
+ // result: (SRL x y)
+ for {
+ x := v_0
+ if v_1.Op != OpARM64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 {
+ break
+ }
+ y := v_1.Args[0]
+ v.reset(OpARM64SRL)
+ v.AddArg2(x, y)
+ return true
+ }
return false
}
func rewriteValueARM64_OpARM64SRLconst(v *Value) bool {
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
index 293924a3db..f4cfea3f82 100644
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go
@@ -82,6 +82,7 @@ func lshMask64x64(v int64, s uint64) int64 {
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// riscv64:"SLL",-"AND\t",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // arm64:"LSL",-"AND"
return v << (s & 63)
}
@@ -90,6 +91,7 @@ func rshMask64Ux64(v uint64, s uint64) uint64 {
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// riscv64:"SRL",-"AND\t",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // arm64:"LSR",-"AND"
return v >> (s & 63)
}
@@ -98,6 +100,7 @@ func rshMask64x64(v int64, s uint64) int64 {
// ppc64le:"ANDCC",-ORN",-"ISEL"
// riscv64:"SRA",-"OR",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // arm64:"ASR",-"AND"
return v >> (s & 63)
}
@@ -106,6 +109,7 @@ func lshMask32x64(v int32, s uint64) int32 {
// ppc64le:"ISEL",-"ORN"
// riscv64:"SLL","AND","SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // arm64:"LSL",-"AND"
return v << (s & 63)
}
@@ -114,6 +118,7 @@ func rshMask32Ux64(v uint32, s uint64) uint32 {
// ppc64le:"ISEL",-"ORN"
// riscv64:"SRL","AND","SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // arm64:"LSR",-"AND"
return v >> (s & 63)
}
@@ -122,6 +127,7 @@ func rshMask32x64(v int32, s uint64) int32 {
// ppc64le:"ISEL",-"ORN"
// riscv64:"SRA","OR","SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // arm64:"ASR",-"AND"
return v >> (s & 63)
}
@@ -130,6 +136,7 @@ func lshMask64x32(v int64, s uint32) int64 {
// ppc64le:"ANDCC",-"ORN"
// riscv64:"SLL",-"AND\t",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // arm64:"LSL",-"AND"
return v << (s & 63)
}
@@ -138,6 +145,7 @@ func rshMask64Ux32(v uint64, s uint32) uint64 {
// ppc64le:"ANDCC",-"ORN"
// riscv64:"SRL",-"AND\t",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // arm64:"LSR",-"AND"
return v >> (s & 63)
}
@@ -146,6 +154,7 @@ func rshMask64x32(v int64, s uint32) int64 {
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// riscv64:"SRA",-"OR",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // arm64:"ASR",-"AND"
return v >> (s & 63)
}