aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/sys_linux_386.s
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2017-11-10 09:48:39 -0800
committerIan Lance Taylor <iant@golang.org>2017-11-14 23:51:19 +0000
commita158382b1c9c0b95a7d41865a405736be6bc585f (patch)
tree6ff6cf9f875fe98d594e03ee9fa814fc214dad04 /src/runtime/sys_linux_386.s
parentfcee1897767c0cfa6e13a843fe5ee5d1deb8081b (diff)
downloadgo-a158382b1c9c0b95a7d41865a405736be6bc585f.tar.xz
runtime: call amd64 VDSO entry points on large stack
If the Linux kernel was built with CONFIG_OPTIMIZE_INLINING=n and was built with hardening options turned on, GCC will insert a stack probe in the VDSO function that requires a full page of stack space. The stack probe can corrupt memory if another thread is using it. Avoid sporadic crashes by calling the VDSO on the g0 or gsignal stack. While we're at it, align the stack as C code expects. We've been getting away with a misaligned stack, but it's possible that the VDSO code will change in the future to break that assumption. Benchmarks show a 11% hit on time.Now, but it's only 6ns. name old time/op new time/op delta AfterFunc-12 1.66ms ± 0% 1.66ms ± 1% ~ (p=0.905 n=9+10) After-12 1.90ms ± 6% 1.86ms ± 0% -2.05% (p=0.012 n=10+8) Stop-12 113µs ± 3% 115µs ± 2% +1.60% (p=0.017 n=9+10) SimultaneousAfterFunc-12 145µs ± 1% 144µs ± 0% -0.68% (p=0.002 n=10+8) StartStop-12 39.5µs ± 3% 40.4µs ± 5% +2.19% (p=0.023 n=10+10) Reset-12 10.2µs ± 0% 10.4µs ± 0% +2.45% (p=0.000 n=10+9) Sleep-12 190µs ± 1% 190µs ± 1% ~ (p=0.971 n=10+10) Ticker-12 4.68ms ± 2% 4.64ms ± 2% -0.83% (p=0.043 n=9+10) Now-12 48.4ns ±11% 54.0ns ±11% +11.42% (p=0.017 n=10+10) NowUnixNano-12 48.5ns ±13% 56.9ns ± 8% +17.30% (p=0.000 n=10+10) Format-12 489ns ±11% 504ns ± 6% ~ (p=0.289 n=10+10) FormatNow-12 436ns ±23% 480ns ±13% +10.25% (p=0.026 n=9+10) MarshalJSON-12 656ns ±14% 587ns ±24% ~ (p=0.063 n=10+10) MarshalText-12 647ns ± 7% 638ns ± 9% ~ (p=0.516 n=10+10) Parse-12 348ns ± 8% 328ns ± 9% -5.66% (p=0.030 n=10+10) ParseDuration-12 136ns ± 9% 140ns ±11% ~ (p=0.425 n=10+10) Hour-12 14.8ns ± 6% 15.6ns ±11% ~ (p=0.085 n=10+10) Second-12 14.0ns ± 6% 14.3ns ±12% ~ (p=0.443 n=10+10) Year-12 32.4ns ±11% 33.4ns ± 6% ~ (p=0.492 n=10+10) Day-12 41.5ns ± 9% 42.3ns ±12% ~ (p=0.239 n=10+10) Fixes #20427 Change-Id: Ia395cbb863215f4499b8e7ef95f4b99f51090911 Reviewed-on: https://go-review.googlesource.com/76990 Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime/sys_linux_386.s')
-rw-r--r--src/runtime/sys_linux_386.s53
1 files changed, 46 insertions, 7 deletions
diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s
index abed125f14..bc3b8dbb1c 100644
--- a/src/runtime/sys_linux_386.s
+++ b/src/runtime/sys_linux_386.s
@@ -203,17 +203,34 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-16
RET
// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB), NOSPLIT, $16
+TEXT runtime·walltime(SB), NOSPLIT, $0-12
+ // We don't know how much stack space the VDSO code will need,
+ // so switch to g0.
+
+ MOVL SP, BP // Save old SP; BP unchanged by C code.
+
+ get_tls(CX)
+ MOVL g(CX), AX
+ MOVL g_m(AX), CX
+ MOVL m_curg(CX), DX
+
+ CMPL AX, DX // Only switch if on curg.
+ JNE noswitch
+
+ MOVL m_g0(CX), DX
+ MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
+
+noswitch:
+ SUBL $16, SP // Space for results
+ ANDL $~15, SP // Align for C code
+
// Stack layout, depending on call path:
// x(SP) vDSO INVOKE_SYSCALL
// 12 ts.tv_nsec ts.tv_nsec
// 8 ts.tv_sec ts.tv_sec
// 4 &ts -
// 0 CLOCK_<id> -
- //
- // If we take the vDSO path, we're calling a function with gcc calling convention.
- // We're guaranteed 128 bytes on entry. We've taken 16, and the call uses another 4,
- // leaving 108 for __vdso_clock_gettime to use.
+
MOVL runtime·__vdso_clock_gettime_sym(SB), AX
CMPL AX, $0
JEQ fallback
@@ -234,6 +251,8 @@ finish:
MOVL 8(SP), AX // sec
MOVL 12(SP), BX // nsec
+ MOVL BP, SP // Restore real SP
+
// sec is in AX, nsec in BX
MOVL AX, sec_lo+0(FP)
MOVL $0, sec_hi+4(FP)
@@ -242,8 +261,26 @@ finish:
// int64 nanotime(void) so really
// void nanotime(int64 *nsec)
-TEXT runtime·nanotime(SB), NOSPLIT, $16
- // See comments above in walltime() about stack space usage and layout.
+TEXT runtime·nanotime(SB), NOSPLIT, $0-8
+ // Switch to g0 stack. See comment above in runtime·walltime.
+
+ MOVL SP, BP // Save old SP; BP unchanged by C code.
+
+ get_tls(CX)
+ MOVL g(CX), AX
+ MOVL g_m(AX), CX
+ MOVL m_curg(CX), DX
+
+ CMPL AX, DX // Only switch if on curg.
+ JNE noswitch
+
+ MOVL m_g0(CX), DX
+ MOVL (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
+
+noswitch:
+ SUBL $16, SP // Space for results
+ ANDL $~15, SP // Align for C code
+
MOVL runtime·__vdso_clock_gettime_sym(SB), AX
CMPL AX, $0
JEQ fallback
@@ -264,6 +301,8 @@ finish:
MOVL 8(SP), AX // sec
MOVL 12(SP), BX // nsec
+ MOVL BP, SP // Restore real SP
+
// sec is in AX, nsec in BX
// convert to DX:AX nsec
MOVL $1000000000, CX