From af40cbe83c451176a1576ff5ce5755c3dc119f45 Mon Sep 17 00:00:00 2001 From: Frank Somers Date: Tue, 10 Oct 2017 22:50:01 +0100 Subject: runtime: use vDSO on linux/386 to improve time.Now performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds support for accelerating time.Now by using the __vdso_clock_gettime fast-path via the vDSO on linux/386 if it is available. When the vDSO path to the clocks is available, it is typically 5x-10x faster than the syscall path (see benchmark extract below). Two such calls are made for each time.Now() call on most platforms as of go 1.9. - Add vdso_linux_386.go, containing the ELF32 definitions for use by vdso_linux.go, the maximum array size, and the symbols to be located in the vDSO. - Modify runtime.walltime and runtime.nanotime to check for and use the vDSO fast-path if available, or fall back to the existing syscall path. - Reduce the stack reservations for runtime.walltime and runtime.monotime from 32 to 16 bytes. It appears the syscall path actually only needed 8 bytes, but 16 is now needed to cover the syscall and vDSO paths. - Remove clearing DX from the syscall paths as clock_gettime only takes 2 args (BX, CX in syscall calling convention), so there should be no need to clear DX. The included BenchmarkTimeNow was run with -cpu=1 -count=20 on an "Intel(R) Celeron(R) CPU J1900 @ 1.99GHz", comparing released go 1.9.1 vs this change. This shows a gain in performance on linux/386 (6.89x), and that no regression occurred on linux/amd64 due to this change. Kernel: linux/i686, GOOS=linux GOARCH=386 name old time/op new time/op delta TimeNow 978ns ± 0% 142ns ± 0% -85.48% (p=0.000 n=16+20) Kernel: linux/x86_64, GOOS=linux GOARCH=amd64 name old time/op new time/op delta TimeNow 125ns ± 0% 125ns ± 0% ~ (all equal) Gains are more dramatic in virtualized environments, presumably due to the overhead of virtualizing the syscall. Fixes #22190 Change-Id: I2f83ce60cb1b8b310c9ced0706bb463c1b3aedf8 Reviewed-on: https://go-review.googlesource.com/69390 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/runtime/sys_linux_386.s | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'src/runtime/sys_linux_386.s') diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s index 79985070f1..722d2ab2d3 100644 --- a/src/runtime/sys_linux_386.s +++ b/src/runtime/sys_linux_386.s @@ -203,12 +203,34 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-16 RET // func walltime() (sec int64, nsec int32) -TEXT runtime·walltime(SB), NOSPLIT, $32 +TEXT runtime·walltime(SB), NOSPLIT, $16 + // Stack layout, depending on call path: + // x(SP) vDSO INVOKE_SYSCALL + // 12 ts.tv_nsec ts.tv_nsec + // 8 ts.tv_sec ts.tv_sec + // 4 &ts - + // 0 CLOCK_ - + // + // If we take the vDSO path, we're calling a function with gcc calling convention. + // We're guaranteed 128 bytes on entry. We've taken 16, and the call uses another 4, + // leaving 108 for __vdso_clock_gettime to use. + MOVL runtime·__vdso_clock_gettime_sym(SB), AX + CMPL AX, $0 + JEQ fallback + + LEAL 8(SP), BX // &ts (struct timespec) + MOVL BX, 4(SP) + MOVL $0, 0(SP) // CLOCK_REALTIME + CALL AX + JMP finish + +fallback: MOVL $SYS_clock_gettime, AX MOVL $0, BX // CLOCK_REALTIME LEAL 8(SP), CX - MOVL $0, DX INVOKE_SYSCALL + +finish: MOVL 8(SP), AX // sec MOVL 12(SP), BX // nsec @@ -220,12 +242,25 @@ TEXT runtime·walltime(SB), NOSPLIT, $32 // int64 nanotime(void) so really // void nanotime(int64 *nsec) -TEXT runtime·nanotime(SB), NOSPLIT, $32 +TEXT runtime·nanotime(SB), NOSPLIT, $16 + // See comments above in walltime() about stack space usage and layout. + MOVL runtime·__vdso_clock_gettime_sym(SB), AX + CMPL AX, $0 + JEQ fallback + + LEAL 8(SP), BX // &ts (struct timespec) + MOVL BX, 4(SP) + MOVL $1, 0(SP) // CLOCK_MONOTONIC + CALL AX + JMP finish + +fallback: MOVL $SYS_clock_gettime, AX MOVL $1, BX // CLOCK_MONOTONIC LEAL 8(SP), CX - MOVL $0, DX INVOKE_SYSCALL + +finish: MOVL 8(SP), AX // sec MOVL 12(SP), BX // nsec -- cgit v1.3-6-g1900