diff options
| author | Vasily Leonenko <vasiliy.leonenko@gmail.com> | 2024-07-23 23:23:33 +0300 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2024-08-06 15:36:27 +0000 |
| commit | b915399e7e91cba31d4031df507b9c97c33f35d8 (patch) | |
| tree | 96c6158e2d0535f9ef1baa05937ee0cac8b78d40 /src/internal/bytealg | |
| parent | 1f0c044d60211e435dc58844127544dd3ecb6a41 (diff) | |
| download | go-b915399e7e91cba31d4031df507b9c97c33f35d8.tar.xz | |
internal/bytealg: optimize Equal for arm64 target
Remove redundant intermediate jump in runtime.memequal
Remove redundant a.ptr==b.ptr check in runtime.memequal_varlen
Add 16-bytes alignment before some labels in runtime.memequal
goos: linux
goarch: arm64
pkg: bytes
│ ./master.log │ ./opt.log │
│ sec/op │ sec/op vs base │
Equal/0-4 0.8342n ± 0% 0.5254n ± 3% -37.01% (p=0.000 n=8)
Equal/same/1-4 2.720n ± 0% 2.720n ± 2% ~ (p=0.779 n=8)
Equal/same/6-4 2.720n ± 5% 2.720n ± 2% ~ (p=0.908 n=8)
Equal/same/9-4 2.722n ± 2% 2.721n ± 2% ~ (p=0.779 n=8)
Equal/same/15-4 2.719n ± 0% 2.719n ± 0% ~ (p=0.641 n=8)
Equal/same/16-4 2.721n ± 2% 2.719n ± 0% -0.07% (p=0.014 n=8)
Equal/same/20-4 2.720n ± 0% 2.721n ± 2% ~ (p=0.236 n=8)
Equal/same/32-4 2.720n ± 1% 2.720n ± 0% ~ (p=0.396 n=8)
Equal/same/4K-4 2.719n ± 0% 2.720n ± 0% ~ (p=0.663 n=8)
Equal/same/4M-4 2.721n ± 0% 2.720n ± 0% ~ (p=0.075 n=8)
Equal/same/64M-4 2.720n ± 0% 2.720n ± 2% ~ (p=0.806 n=8)
Equal/1-4 6.671n ± 0% 5.449n ± 0% -18.33% (p=0.000 n=8)
Equal/6-4 8.761n ± 2% 7.508n ± 0% -14.30% (p=0.000 n=8)
Equal/9-4 8.343n ± 0% 7.091n ± 0% -15.01% (p=0.000 n=8)
Equal/15-4 8.339n ± 2% 7.090n ± 0% -14.98% (p=0.000 n=8)
Equal/16-4 9.173n ± 0% 7.925n ± 2% -13.61% (p=0.000 n=8)
Equal/20-4 11.26n ± 0% 10.01n ± 0% -11.10% (p=0.000 n=8)
Equal/32-4 10.425n ± 0% 9.176n ± 0% -11.98% (p=0.000 n=8)
Equal/4K-4 192.9n ± 0% 192.7n ± 0% -0.10% (p=0.044 n=8)
Equal/4M-4 191.3µ ± 0% 191.3µ ± 0% ~ (p=0.798 n=8)
Equal/64M-4 3.066m ± 2% 3.065m ± 0% ~ (p=0.083 n=8)
EqualBothUnaligned/64_0-4 7.506n ± 2% 7.090n ± 2% -5.55% (p=0.000 n=8)
EqualBothUnaligned/64_1-4 7.850n ± 1% 7.423n ± 0% -5.43% (p=0.000 n=8)
EqualBothUnaligned/64_4-4 7.505n ± 0% 7.088n ± 0% -5.56% (p=0.000 n=8)
EqualBothUnaligned/64_7-4 7.840n ± 0% 7.413n ± 0% -5.44% (p=0.000 n=8)
EqualBothUnaligned/4096_0-4 193.0n ± 4% 190.9n ± 0% -1.09% (p=0.004 n=8)
EqualBothUnaligned/4096_1-4 223.9n ± 0% 223.1n ± 0% -0.36% (p=0.000 n=8)
EqualBothUnaligned/4096_4-4 191.9n ± 2% 191.5n ± 0% -0.21% (p=0.004 n=8)
EqualBothUnaligned/4096_7-4 223.8n ± 0% 223.1n ± 1% ~ (p=0.098 n=8)
EqualBothUnaligned/4194304_0-4 191.8µ ± 0% 191.8µ ± 0% ~ (p=0.504 n=8)
EqualBothUnaligned/4194304_1-4 225.4µ ± 2% 225.5µ ± 0% ~ (p=0.065 n=8)
EqualBothUnaligned/4194304_4-4 192.6µ ± 0% 192.7µ ± 2% +0.06% (p=0.041 n=8)
EqualBothUnaligned/4194304_7-4 225.4µ ± 0% 225.5µ ± 0% +0.05% (p=0.050 n=8)
EqualBothUnaligned/67108864_0-4 3.069m ± 0% 3.069m ± 0% ~ (p=0.314 n=8)
EqualBothUnaligned/67108864_1-4 3.589m ± 0% 3.588m ± 0% ~ (p=0.959 n=8)
EqualBothUnaligned/67108864_4-4 3.083m ± 0% 3.083m ± 2% ~ (p=0.505 n=8)
EqualBothUnaligned/67108864_7-4 3.588m ± 0% 3.588m ± 0% ~ (p=1.000 n=8)
geomean 199.9n 190.5n -4.70%
Change-Id: Ib8d0d4006dd39162a600ac98a5f44a0f05136ed3
Reviewed-on: https://go-review.googlesource.com/c/go/+/601135
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/internal/bytealg')
| -rw-r--r-- | src/internal/bytealg/equal_arm64.s | 26 |
1 files changed, 10 insertions, 16 deletions
diff --git a/src/internal/bytealg/equal_arm64.s b/src/internal/bytealg/equal_arm64.s index 4db9515474..408ab374e6 100644 --- a/src/internal/bytealg/equal_arm64.s +++ b/src/internal/bytealg/equal_arm64.s @@ -5,25 +5,11 @@ #include "go_asm.h" #include "textflag.h" -// memequal(a, b unsafe.Pointer, size uintptr) bool -TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25 - // short path to handle 0-byte case - CBZ R2, equal - // short path to handle equal pointers - CMP R0, R1 - BEQ equal - B memeqbody<>(SB) -equal: - MOVD $1, R0 - RET - // memequal_varlen(a, b unsafe.Pointer) bool TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17 - CMP R0, R1 - BEQ eq MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure CBZ R2, eq - B memeqbody<>(SB) + B runtime·memequal<ABIInternal>(SB) eq: MOVD $1, R0 RET @@ -33,7 +19,13 @@ eq: // R1: pointer b // R2: data len // at return: result in R0 -TEXT memeqbody<>(SB),NOSPLIT,$0 +// memequal(a, b unsafe.Pointer, size uintptr) bool +TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25 + // short path to handle 0-byte case + CBZ R2, equal + // short path to handle equal pointers + CMP R0, R1 + BEQ equal CMP $1, R2 // handle 1-byte special case for better performance BEQ one @@ -91,6 +83,7 @@ tail: EOR R4, R5 CBNZ R5, not_equal B equal + PCALIGN $16 lt_8: TBZ $2, R2, lt_4 MOVWU (R0), R4 @@ -103,6 +96,7 @@ lt_8: EOR R4, R5 CBNZ R5, not_equal B equal + PCALIGN $16 lt_4: TBZ $1, R2, lt_2 MOVHU.P 2(R0), R4 |
