diff options
| author | qmuntal <quimmuntal@gmail.com> | 2022-12-02 09:25:26 +0100 |
|---|---|---|
| committer | Quim Muntal <quimmuntal@gmail.com> | 2023-01-23 18:15:35 +0000 |
| commit | 28f8dbd7b941648aea311bb0cf331f88c02441b6 (patch) | |
| tree | 092c7e556a19d912f6665b9fefd0cb532b2671ae /src/cmd | |
| parent | bb5ff5342d31723ecf245e8e53b79bce23b88839 (diff) | |
| download | go-28f8dbd7b941648aea311bb0cf331f88c02441b6.tar.xz | |
runtime,cmd/internal/obj/x86: use TEB TLS slots on windows/i386
This CL redesign how we get the TLS pointer on windows/i386.
It applies the same changes as done in CL 431775 for windows/amd64.
We were previously reading it from the [TEB] arbitrary data slot,
located at 0x14(FS), which can only hold 1 TLS pointer.
With this CL, we will read the TLS pointer from the TEB TLS slot array,
located at 0xE10(GS). The TLS slot array can hold multiple
TLS pointers, up to 64, so multiple Go runtimes running on the
same thread can coexists with different TLS.
Each new TLS slot has to be allocated via [TlsAlloc],
which returns the slot index. This index can then be used to get the
slot offset from GS with the following formula: 0xE10 + index*4.
The slot index is fixed per Go runtime, so we can store it
in runtime.tls_g and use it latter on to read/update the TLS pointer.
Loading the TLS pointer requires the following asm instructions:
MOVQ runtime.tls_g, AX
MOVQ AX(FS), AX
Notice that this approach will now be implemented in all the supported
windows arches.
[TEB]: https://en.wikipedia.org/wiki/Win32_Thread_Information_Block
[TlsAlloc]: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-tlsalloc
Change-Id: If4550b0d44694ee6480d4093b851f4991a088b32
Reviewed-on: https://go-review.googlesource.com/c/go/+/454675
Reviewed-by: Michael Pratt <mpratt@google.com>
Run-TryBot: Quim Muntal <quimmuntal@gmail.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Diffstat (limited to 'src/cmd')
| -rw-r--r-- | src/cmd/internal/obj/x86/asm6.go | 46 | ||||
| -rw-r--r-- | src/cmd/internal/obj/x86/obj6.go | 18 |
2 files changed, 24 insertions, 40 deletions
diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 953eedc0d0..de08b42ab5 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -2551,22 +2551,6 @@ func prefixof(ctxt *obj.Link, a *obj.Addr) int { } } - if ctxt.Arch.Family == sys.I386 { - if a.Index == REG_TLS && ctxt.Flag_shared { - // When building for inclusion into a shared library, an instruction of the form - // MOVL off(CX)(TLS*1), AX - // becomes - // mov %gs:off(%ecx), %eax - // which assumes that the correct TLS offset has been loaded into %ecx (today - // there is only one TLS variable -- g -- so this is OK). When not building for - // a shared library the instruction it becomes - // mov 0x0(%ecx), %eax - // and a R_TLS_LE relocation, and so does not require a prefix. - return 0x65 // GS - } - return 0 - } - switch a.Index { case REG_CS: return 0x2e @@ -2582,11 +2566,18 @@ func prefixof(ctxt *obj.Link, a *obj.Addr) int { // When building for inclusion into a shared library, an instruction of the form // MOV off(CX)(TLS*1), AX // becomes - // mov %fs:off(%rcx), %rax - // which assumes that the correct TLS offset has been loaded into %rcx (today + // mov %gs:off(%ecx), %eax // on i386 + // mov %fs:off(%rcx), %rax // on amd64 + // which assumes that the correct TLS offset has been loaded into CX (today // there is only one TLS variable -- g -- so this is OK). When not building for - // a shared library the instruction does not require a prefix. - return 0x64 + // a shared library the instruction it becomes + // mov 0x0(%ecx), %eax // on i386 + // mov 0x0(%rcx), %rax // on amd64 + // and a R_TLS_LE relocation, and so does not require a prefix. + if ctxt.Arch.Family == sys.I386 { + return 0x65 // GS + } + return 0x64 // FS } case REG_FS: @@ -3725,7 +3716,7 @@ func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj if REG_AX <= base && base <= REG_R15 { if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid && - !(ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64) { + ctxt.Headtype != objabi.Hwindows { rel = obj.Reloc{} rel.Type = objabi.R_TLS_LE rel.Siz = 4 @@ -5137,19 +5128,6 @@ func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { pp.From.Index = REG_NONE ab.Put1(0x8B) ab.asmand(ctxt, cursym, p, &pp.From, &p.To) - - case objabi.Hwindows: - // Windows TLS base is always 0x14(FS). - pp.From = p.From - - pp.From.Type = obj.TYPE_MEM - pp.From.Reg = REG_FS - pp.From.Offset = 0x14 - pp.From.Index = REG_NONE - pp.From.Scale = 0 - ab.Put2(0x64, // FS - 0x8B) - ab.asmand(ctxt, cursym, p, &pp.From, &p.To) } break } diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index 85a4260453..a071762681 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -158,11 +158,11 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { } } - // Android and Win64 use a tls offset determined at runtime. Rewrite + // Android and Windows use a tls offset determined at runtime. Rewrite // MOVQ TLS, BX // to // MOVQ runtime.tls_g(SB), BX - if (isAndroid || (ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64)) && + if (isAndroid || ctxt.Headtype == objabi.Hwindows) && (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN @@ -170,17 +170,23 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { p.From.Sym = ctxt.Lookup("runtime.tls_g") p.From.Index = REG_NONE if ctxt.Headtype == objabi.Hwindows { - // Win64 requires an additional indirection + // Windows requires an additional indirection // to retrieve the TLS pointer, - // as runtime.tls_g contains the TLS offset from GS. - // add + // as runtime.tls_g contains the TLS offset from GS or FS. + // on AMD64 add // MOVQ 0(BX)(GS*1), BX + // on 386 add + // MOVQ 0(BX)(FS*1), BX4 q := obj.Appendp(p, newprog) q.As = p.As q.From = obj.Addr{} q.From.Type = obj.TYPE_MEM q.From.Reg = p.To.Reg - q.From.Index = REG_GS + if ctxt.Arch.Family == sys.AMD64 { + q.From.Index = REG_GS + } else { + q.From.Index = REG_FS + } q.From.Scale = 1 q.From.Offset = 0 q.To = p.To |
