From ea59ebd3387ab93b826606ea90a4149dad7b4e50 Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Mon, 12 Mar 2018 07:32:28 +0000 Subject: runtime: use vDSO for clock_gettime on linux/arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the __vdso_clock_gettime fast path via the vDSO on linux/arm64 to speed up nanotime and walltime. This results in the following performance improvement for time.Now on Cavium ThunderX: name old time/op new time/op delta TimeNow 442ns ± 0% 163ns ± 0% -63.16% (p=0.000 n=10+10) And benchmarks on VDSO BenchmarkClockVDSOAndFallbackPaths/vDSO 10000000 166 ns/op BenchmarkClockVDSOAndFallbackPaths/Fallback 3000000 456 ns/op Change-Id: I326118c6dff865eaa0569fc45d1fc1ff95cb74f6 Reviewed-on: https://go-review.googlesource.com/99855 Run-TryBot: Tobias Klauser Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/runtime/sys_linux_arm64.s | 71 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 2 deletions(-) (limited to 'src/runtime/sys_linux_arm64.s') diff --git a/src/runtime/sys_linux_arm64.s b/src/runtime/sys_linux_arm64.s index 6954f32aac..8a56ba6bab 100644 --- a/src/runtime/sys_linux_arm64.s +++ b/src/runtime/sys_linux_arm64.s @@ -12,6 +12,9 @@ #define AT_FDCWD -100 +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 + #define SYS_exit 93 #define SYS_read 63 #define SYS_write 64 @@ -180,23 +183,87 @@ TEXT runtime·mincore(SB),NOSPLIT|NOFRAME,$0-28 // func walltime() (sec int64, nsec int32) TEXT runtime·walltime(SB),NOSPLIT,$24-12 - MOVW $0, R0 // CLOCK_REALTIME + MOVD RSP, R20 // R20 is unchanged by C code MOVD RSP, R1 + + MOVD g_m(g), R21 // R21 = m + + // Set vdsoPC and vdsoSP for SIGPROF traceback. + MOVD LR, m_vdsoPC(R21) + MOVD R20, m_vdsoSP(R21) + + MOVD m_curg(R21), R0 + CMP g, R0 + BNE noswitch + + MOVD m_g0(R21), R3 + MOVD (g_sched+gobuf_sp)(R3), R1 // Set RSP to g0 stack + +noswitch: + SUB $16, R1 + BIC $15, R1 // Align for C code + MOVD R1, RSP + + MOVW $CLOCK_REALTIME, R0 + MOVD runtime·vdsoClockgettimeSym(SB), R2 + CBZ R2, fallback + BL (R2) + B finish + +fallback: MOVD $SYS_clock_gettime, R8 SVC + +finish: MOVD 0(RSP), R3 // sec MOVD 8(RSP), R5 // nsec + + MOVD R20, RSP // restore SP + MOVD $0, m_vdsoSP(R21) // clear vdsoSP + MOVD R3, sec+0(FP) MOVW R5, nsec+8(FP) RET TEXT runtime·nanotime(SB),NOSPLIT,$24-8 - MOVW $1, R0 // CLOCK_MONOTONIC + MOVD RSP, R20 // R20 is unchanged by C code MOVD RSP, R1 + + MOVD g_m(g), R21 // R21 = m + + // Set vdsoPC and vdsoSP for SIGPROF traceback. + MOVD LR, m_vdsoPC(R21) + MOVD R20, m_vdsoSP(R21) + + MOVD m_curg(R21), R0 + CMP g, R0 + BNE noswitch + + MOVD m_g0(R21), R3 + MOVD (g_sched+gobuf_sp)(R3), R1 // Set RSP to g0 stack + +noswitch: + SUB $16, R1 + BIC $15, R1 + MOVD R1, RSP + + MOVW $CLOCK_MONOTONIC, R0 + MOVD runtime·vdsoClockgettimeSym(SB), R2 + CBZ R2, fallback + BL (R2) + B finish + +fallback: MOVD $SYS_clock_gettime, R8 SVC + +finish: MOVD 0(RSP), R3 // sec MOVD 8(RSP), R5 // nsec + + MOVD R20, RSP // restore SP + MOVD $0, m_vdsoSP(R21) // clear vdsoSP + // sec is in R3, nsec in R5 // return nsec in R3 MOVD $1000000000, R4 -- cgit v1.3