aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorqmuntal <quimmuntal@gmail.com>2022-12-02 09:25:26 +0100
committerQuim Muntal <quimmuntal@gmail.com>2023-01-23 18:15:35 +0000
commit28f8dbd7b941648aea311bb0cf331f88c02441b6 (patch)
tree092c7e556a19d912f6665b9fefd0cb532b2671ae /src/runtime
parentbb5ff5342d31723ecf245e8e53b79bce23b88839 (diff)
downloadgo-28f8dbd7b941648aea311bb0cf331f88c02441b6.tar.xz
runtime,cmd/internal/obj/x86: use TEB TLS slots on windows/i386
This CL redesign how we get the TLS pointer on windows/i386. It applies the same changes as done in CL 431775 for windows/amd64. We were previously reading it from the [TEB] arbitrary data slot, located at 0x14(FS), which can only hold 1 TLS pointer. With this CL, we will read the TLS pointer from the TEB TLS slot array, located at 0xE10(GS). The TLS slot array can hold multiple TLS pointers, up to 64, so multiple Go runtimes running on the same thread can coexists with different TLS. Each new TLS slot has to be allocated via [TlsAlloc], which returns the slot index. This index can then be used to get the slot offset from GS with the following formula: 0xE10 + index*4. The slot index is fixed per Go runtime, so we can store it in runtime.tls_g and use it latter on to read/update the TLS pointer. Loading the TLS pointer requires the following asm instructions: MOVQ runtime.tls_g, AX MOVQ AX(FS), AX Notice that this approach will now be implemented in all the supported windows arches. [TEB]: https://en.wikipedia.org/wiki/Win32_Thread_Information_Block [TlsAlloc]: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-tlsalloc Change-Id: If4550b0d44694ee6480d4093b851f4991a088b32 Reviewed-on: https://go-review.googlesource.com/c/go/+/454675 Reviewed-by: Michael Pratt <mpratt@google.com> Run-TryBot: Quim Muntal <quimmuntal@gmail.com> Reviewed-by: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/asm_386.s19
-rw-r--r--src/runtime/cgo/gcc_windows_386.c12
-rw-r--r--src/runtime/sys_windows_386.s46
3 files changed, 60 insertions, 17 deletions
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index e16880c950..02179d2ee9 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -171,8 +171,12 @@ nocpuinfo:
MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
#else
MOVL $0, BX
- MOVL BX, 12(SP) // arg 3,4: not used when using platform's TLS
- MOVL BX, 8(SP)
+ MOVL BX, 12(SP) // arg 4: not used when using platform's TLS
+#ifdef GOOS_windows
+ MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
+#else
+ MOVL BX, 8(SP) // arg 3: not used when using platform's TLS
+#endif
#endif
MOVL $setg_gcc<>(SB), BX
MOVL BX, 4(SP) // arg 2: setg_gcc
@@ -795,14 +799,15 @@ havem:
TEXT runtime·setg(SB), NOSPLIT, $0-4
MOVL gg+0(FP), BX
#ifdef GOOS_windows
+ MOVL runtime·tls_g(SB), CX
CMPL BX, $0
JNE settls
- MOVL $0, 0x14(FS)
+ MOVL $0, 0(CX)(FS)
RET
settls:
MOVL g_m(BX), AX
LEAL m_tls(AX), AX
- MOVL AX, 0x14(FS)
+ MOVL AX, 0(CX)(FS)
#endif
get_tls(CX)
MOVL BX, g(CX)
@@ -867,6 +872,9 @@ rdtsc:
JMP done
TEXT ldt0setup<>(SB),NOSPLIT,$16-0
+#ifdef GOOS_windows
+ CALL runtime·wintls(SB)
+#endif
// set up ldt 7 to point at m0.tls
// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
// the entry number is just a hint. setldt will set up GS with what it used.
@@ -1577,3 +1585,6 @@ TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12
DATA runtime·tls_g+0(SB)/4, $8
GLOBL runtime·tls_g+0(SB), NOPTR, $4
#endif
+#ifdef GOOS_windows
+GLOBL runtime·tls_g+0(SB), NOPTR, $4
+#endif
diff --git a/src/runtime/cgo/gcc_windows_386.c b/src/runtime/cgo/gcc_windows_386.c
index 56fbaac9b8..0f4f01c7c0 100644
--- a/src/runtime/cgo/gcc_windows_386.c
+++ b/src/runtime/cgo/gcc_windows_386.c
@@ -12,10 +12,12 @@
#include "libcgo_windows.h"
static void threadentry(void*);
+static DWORD *tls_g;
void
-x_cgo_init(G *g)
+x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase)
{
+ tls_g = (DWORD *)tlsg;
}
@@ -39,10 +41,10 @@ threadentry(void *v)
* Set specific keys in thread local storage.
*/
asm volatile (
- "movl %0, %%fs:0x14\n" // MOVL tls0, 0x14(FS)
- "movl %%fs:0x14, %%eax\n" // MOVL 0x14(FS), tmp
- "movl %1, 0(%%eax)\n" // MOVL g, 0(FS)
- :: "r"(ts.tls), "r"(ts.g) : "%eax"
+ "movl %0, %%fs:0(%1)\n" // MOVL tls0, 0(tls_g)(FS)
+ "movl %%fs:0(%1), %%eax\n" // MOVL 0(tls_g)(FS), tmp
+ "movl %2, 0(%%eax)\n" // MOVL g, 0(AX)
+ :: "r"(ts.tls), "r"(*tls_g), "r"(ts.g) : "%eax"
);
crosscall_386(ts.fn);
diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s
index cf3a439523..8713f7d0d9 100644
--- a/src/runtime/sys_windows_386.s
+++ b/src/runtime/sys_windows_386.s
@@ -7,6 +7,9 @@
#include "textflag.h"
#include "time_windows.h"
+// Offsets into Thread Environment Block (pointer in FS)
+#define TEB_TlsSlots 0xE10
+
// void runtime·asmstdcall(void *c);
TEXT runtime·asmstdcall(SB),NOSPLIT,$0
MOVL fn+0(FP), BX
@@ -222,7 +225,7 @@ TEXT runtime·callbackasm1(SB),NOSPLIT,$0
RET
// void tstart(M *newm);
-TEXT tstart<>(SB),NOSPLIT,$0
+TEXT tstart<>(SB),NOSPLIT,$8-4
MOVL newm+0(FP), CX // m
MOVL m_g0(CX), DX // g
@@ -236,10 +239,11 @@ TEXT tstart<>(SB),NOSPLIT,$0
MOVL AX, g_stackguard1(DX)
// Set up tls.
- LEAL m_tls(CX), SI
- MOVL SI, 0x14(FS)
+ LEAL m_tls(CX), DI
MOVL CX, g_m(DX)
- MOVL DX, g(SI)
+ MOVL DX, g(DI)
+ MOVL DI, 4(SP)
+ CALL runtime·setldt(SB) // clobbers CX and DX
// Someday the convention will be D is always cleared.
CLD
@@ -266,10 +270,11 @@ TEXT runtime·tstart_stdcall(SB),NOSPLIT,$0
RET
-// setldt(int entry, int address, int limit)
-TEXT runtime·setldt(SB),NOSPLIT,$0
- MOVL base+4(FP), CX
- MOVL CX, 0x14(FS)
+// setldt(int slot, int base, int size)
+TEXT runtime·setldt(SB),NOSPLIT,$0-12
+ MOVL base+4(FP), DX
+ MOVL runtime·tls_g(SB), CX
+ MOVL DX, 0(CX)(FS)
RET
// Runs on OS stack.
@@ -356,3 +361,28 @@ loop:
useQPC:
JMP runtime·nanotimeQPC(SB)
RET
+
+// This is called from rt0_go, which runs on the system stack
+// using the initial stack allocated by the OS.
+TEXT runtime·wintls(SB),NOSPLIT|NOFRAME,$0
+ // Allocate a TLS slot to hold g across calls to external code
+ MOVL SP, BP
+ MOVL runtime·_TlsAlloc(SB), AX
+ CALL AX
+ MOVL BP, SP
+
+ MOVL AX, CX // TLS index
+
+ // Assert that slot is less than 64 so we can use _TEB->TlsSlots
+ CMPL CX, $64
+ JB ok
+ CALL runtime·abort(SB)
+ok:
+ // Convert the TLS index at CX into
+ // an offset from TEB_TlsSlots.
+ SHLL $2, CX
+
+ // Save offset from TLS into tls_g.
+ ADDL $TEB_TlsSlots, CX
+ MOVL CX, runtime·tls_g(SB)
+ RET