aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/sys_linux_arm64.s
diff options
context:
space:
mode:
authorMeng Zhuo <mengzhuo1203@gmail.com>2018-03-12 07:32:28 +0000
committerIan Lance Taylor <iant@golang.org>2018-03-27 13:21:27 +0000
commitea59ebd3387ab93b826606ea90a4149dad7b4e50 (patch)
tree2bb7bbd413c4e31da3b8f5b52286e0f8cab2b5fa /src/runtime/sys_linux_arm64.s
parentb63b0f2b75c71b9e80e42488aaa7ddf1a5da11ae (diff)
downloadgo-ea59ebd3387ab93b826606ea90a4149dad7b4e50.tar.xz
runtime: use vDSO for clock_gettime on linux/arm64
Use the __vdso_clock_gettime fast path via the vDSO on linux/arm64 to speed up nanotime and walltime. This results in the following performance improvement for time.Now on Cavium ThunderX: name old time/op new time/op delta TimeNow 442ns ± 0% 163ns ± 0% -63.16% (p=0.000 n=10+10) And benchmarks on VDSO BenchmarkClockVDSOAndFallbackPaths/vDSO 10000000 166 ns/op BenchmarkClockVDSOAndFallbackPaths/Fallback 3000000 456 ns/op Change-Id: I326118c6dff865eaa0569fc45d1fc1ff95cb74f6 Reviewed-on: https://go-review.googlesource.com/99855 Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com> Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/runtime/sys_linux_arm64.s')
-rw-r--r--src/runtime/sys_linux_arm64.s71
1 files changed, 69 insertions, 2 deletions
diff --git a/src/runtime/sys_linux_arm64.s b/src/runtime/sys_linux_arm64.s
index 6954f32aac..8a56ba6bab 100644
--- a/src/runtime/sys_linux_arm64.s
+++ b/src/runtime/sys_linux_arm64.s
@@ -12,6 +12,9 @@
#define AT_FDCWD -100
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+
#define SYS_exit 93
#define SYS_read 63
#define SYS_write 64
@@ -180,23 +183,87 @@ TEXT runtime·mincore(SB),NOSPLIT|NOFRAME,$0-28
// func walltime() (sec int64, nsec int32)
TEXT runtime·walltime(SB),NOSPLIT,$24-12
- MOVW $0, R0 // CLOCK_REALTIME
+ MOVD RSP, R20 // R20 is unchanged by C code
MOVD RSP, R1
+
+ MOVD g_m(g), R21 // R21 = m
+
+ // Set vdsoPC and vdsoSP for SIGPROF traceback.
+ MOVD LR, m_vdsoPC(R21)
+ MOVD R20, m_vdsoSP(R21)
+
+ MOVD m_curg(R21), R0
+ CMP g, R0
+ BNE noswitch
+
+ MOVD m_g0(R21), R3
+ MOVD (g_sched+gobuf_sp)(R3), R1 // Set RSP to g0 stack
+
+noswitch:
+ SUB $16, R1
+ BIC $15, R1 // Align for C code
+ MOVD R1, RSP
+
+ MOVW $CLOCK_REALTIME, R0
+ MOVD runtime·vdsoClockgettimeSym(SB), R2
+ CBZ R2, fallback
+ BL (R2)
+ B finish
+
+fallback:
MOVD $SYS_clock_gettime, R8
SVC
+
+finish:
MOVD 0(RSP), R3 // sec
MOVD 8(RSP), R5 // nsec
+
+ MOVD R20, RSP // restore SP
+ MOVD $0, m_vdsoSP(R21) // clear vdsoSP
+
MOVD R3, sec+0(FP)
MOVW R5, nsec+8(FP)
RET
TEXT runtime·nanotime(SB),NOSPLIT,$24-8
- MOVW $1, R0 // CLOCK_MONOTONIC
+ MOVD RSP, R20 // R20 is unchanged by C code
MOVD RSP, R1
+
+ MOVD g_m(g), R21 // R21 = m
+
+ // Set vdsoPC and vdsoSP for SIGPROF traceback.
+ MOVD LR, m_vdsoPC(R21)
+ MOVD R20, m_vdsoSP(R21)
+
+ MOVD m_curg(R21), R0
+ CMP g, R0
+ BNE noswitch
+
+ MOVD m_g0(R21), R3
+ MOVD (g_sched+gobuf_sp)(R3), R1 // Set RSP to g0 stack
+
+noswitch:
+ SUB $16, R1
+ BIC $15, R1
+ MOVD R1, RSP
+
+ MOVW $CLOCK_MONOTONIC, R0
+ MOVD runtime·vdsoClockgettimeSym(SB), R2
+ CBZ R2, fallback
+ BL (R2)
+ B finish
+
+fallback:
MOVD $SYS_clock_gettime, R8
SVC
+
+finish:
MOVD 0(RSP), R3 // sec
MOVD 8(RSP), R5 // nsec
+
+ MOVD R20, RSP // restore SP
+ MOVD $0, m_vdsoSP(R21) // clear vdsoSP
+
// sec is in R3, nsec in R5
// return nsec in R3
MOVD $1000000000, R4