aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorerifan01 <eric.fang@arm.com>2018-04-17 07:37:38 +0000
committerCherry Zhang <cherryyz@google.com>2018-04-24 20:21:08 +0000
commitd4e936cfd622de322dc93f69144c68bb4c133e21 (patch)
tree4b4fe7a1b9308d7673ae8447c102cb4ef3b67cdc /src
parent5d4267e4803436e17b87e88b13dc79e528299634 (diff)
downloadgo-d4e936cfd622de322dc93f69144c68bb4c133e21.tar.xz
internal/bytealg: optimize IndexString on arm64
This CL adjusts the order of the branch instructions of the code to make it easier for the LIKELY branch to happen. Benchmarks: name old time/op new time/op delta pkg:strings goos:linux goarch:arm64 IndexHard2-8 2.17ms ± 1% 1.23ms ± 0% -43.34% (p=0.008 n=5+5) CountHard2-8 2.13ms ± 1% 1.21ms ± 2% -43.31% (p=0.008 n=5+5) pkg:bytes goos:linux goarch:arm64 IndexRune/4M-8 661µs ±22% 513µs ± 0% -22.32% (p=0.008 n=5+5) IndexEasy/4M-8 672µs ±23% 513µs ± 0% -23.71% (p=0.016 n=5+4) Change-Id: Ib96f095edf77747edc8a971e79f5c1428e5808ce Reviewed-on: https://go-review.googlesource.com/109015 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src')
-rw-r--r--src/internal/bytealg/index_arm64.s74
1 files changed, 35 insertions, 39 deletions
diff --git a/src/internal/bytealg/index_arm64.s b/src/internal/bytealg/index_arm64.s
index 6c93ef3ce8..02eb658fd0 100644
--- a/src/internal/bytealg/index_arm64.s
+++ b/src/internal/bytealg/index_arm64.s
@@ -41,12 +41,12 @@ len_8:
MOVD (R2), R5
loop_8:
// R6 contains substring for comparison
+ CMP R4, R0
+ BHI not_found
MOVD.P 1(R0), R6
CMP R5, R6
- BEQ found
- CMP R4, R0
- BLS loop_8
- JMP not_found
+ BNE loop_8
+ B found
len_2_7:
TBZ $2, R3, len_2_3
TBZ $1, R3, len_4_5
@@ -57,31 +57,29 @@ len_7:
// 1-byte overlap with R5
MOVWU 3(R2), R6
loop_7:
+ CMP R4, R0
+ BHI not_found
MOVWU.P 1(R0), R3
CMP R5, R3
- BNE not_equal_7
+ BNE loop_7
MOVWU 2(R0), R3
CMP R6, R3
- BEQ found
-not_equal_7:
- CMP R4, R0
- BLS loop_7
- JMP not_found
+ BNE loop_7
+ B found
len_6:
// R5 and R6 contain 6-byte sep
MOVWU (R2), R5
MOVHU 4(R2), R6
loop_6:
+ CMP R4, R0
+ BHI not_found
MOVWU.P 1(R0), R3
CMP R5, R3
- BNE not_equal_6
+ BNE loop_6
MOVHU 3(R0), R3
CMP R6, R3
- BEQ found
-not_equal_6:
- CMP R4, R0
- BLS loop_6
- JMP not_found
+ BNE loop_6
+ B found
len_4_5:
TBZ $0, R3, len_4
len_5:
@@ -89,26 +87,25 @@ len_5:
MOVWU (R2), R5
MOVBU 4(R2), R7
loop_5:
+ CMP R4, R0
+ BHI not_found
MOVWU.P 1(R0), R3
CMP R5, R3
- BNE not_equal_5
+ BNE loop_5
MOVBU 3(R0), R3
CMP R7, R3
- BEQ found
-not_equal_5:
- CMP R4, R0
- BLS loop_5
- JMP not_found
+ BNE loop_5
+ B found
len_4:
// R5 contains 4-byte sep
MOVWU (R2), R5
loop_4:
+ CMP R4, R0
+ BHI not_found
MOVWU.P 1(R0), R6
CMP R5, R6
- BEQ found
- CMP R4, R0
- BLS loop_4
- JMP not_found
+ BNE loop_4
+ B found
len_2_3:
TBZ $0, R3, len_2
len_3:
@@ -116,30 +113,29 @@ len_3:
MOVHU (R2), R6
MOVBU 2(R2), R7
loop_3:
+ CMP R4, R0
+ BHI not_found
MOVHU.P 1(R0), R3
CMP R6, R3
- BNE not_equal_3
+ BNE loop_3
MOVBU 1(R0), R3
CMP R7, R3
- BEQ found
-not_equal_3:
- CMP R4, R0
- BLS loop_3
- JMP not_found
+ BNE loop_3
+ B found
len_2:
// R5 contains 2-byte sep
MOVHU (R2), R5
loop_2:
+ CMP R4, R0
+ BHI not_found
MOVHU.P 1(R0), R6
CMP R5, R6
- BEQ found
- CMP R4, R0
- BLS loop_2
-not_found:
- MOVD $-1, R0
- MOVD R0, (R9)
- RET
+ BNE loop_2
found:
SUB R8, R0, R0
MOVD R0, (R9)
RET
+not_found:
+ MOVD $-1, R0
+ MOVD R0, (R9)
+ RET