From da564d0006e2cc286fecb3cec94ed143a2667866 Mon Sep 17 00:00:00 2001 From: qmuntal Date: Mon, 19 Sep 2022 12:19:38 +0200 Subject: runtime,cmd/internal/obj/x86: use TEB TLS slots on windows/amd64 This CL redesign how we get the TLS pointer on windows/amd64. We were previously reading it from the [TEB] arbitrary data slot, located at 0x28(GS), which can only hold 1 TLS pointer. With this CL, we will read the TLS pointer from the TEB TLS slot array, located at 0x1480(GS). The TLS slot array can hold multiple TLS pointers, up to 64, so multiple Go runtimes running on the same thread can coexists with different TLS. Each new TLS slot has to be allocated via [TlsAlloc], which returns the slot index. This index can then be used to get the slot offset from GS with the following formula: 0x1480 + index*8 The slot index is fixed per Go runtime, so we can store it in runtime.tls_g and use it latter on to read/update the TLS pointer. Loading the TLS pointer requires the following asm instructions: MOVQ runtime.tls_g, AX MOVQ AX(GS), AX Notice that this approach is also implemented on windows/arm64. [TEB]: https://en.wikipedia.org/wiki/Win32_Thread_Information_Block [TlsAlloc]: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-tlsalloc Updates #22192 Change-Id: Idea7119fd76a3cd083979a4d57ed64b552fa101b Reviewed-on: https://go-review.googlesource.com/c/go/+/431775 Reviewed-by: Cherry Mui TryBot-Result: Gopher Robot Run-TryBot: Quim Muntal Reviewed-by: Michael Knyszek Reviewed-by: Alex Brainman --- src/cmd/internal/obj/x86/asm6.go | 20 +++----------------- src/cmd/internal/obj/x86/obj6.go | 21 +++++++++++++++++++-- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'src/cmd/internal') diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 9faaba3759..953eedc0d0 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -3612,7 +3612,7 @@ func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj goto bad } - if a.Index != REG_NONE && a.Index != REG_TLS { + if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) { base := int(a.Reg) switch a.Name { case obj.NAME_EXTERN, @@ -3724,7 +3724,8 @@ func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj } if REG_AX <= base && base <= REG_R15 { - if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid { + if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid && + !(ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64) { rel = obj.Reloc{} rel.Type = objabi.R_TLS_LE rel.Siz = 4 @@ -5205,21 +5206,6 @@ func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { ab.Put2(0x64, // FS 0x8B) ab.asmand(ctxt, cursym, p, &pp.From, &p.To) - - case objabi.Hwindows: - // Windows TLS base is always 0x28(GS). - pp.From = p.From - - pp.From.Type = obj.TYPE_MEM - pp.From.Name = obj.NAME_NONE - pp.From.Reg = REG_GS - pp.From.Offset = 0x28 - pp.From.Index = REG_NONE - pp.From.Scale = 0 - ab.rexflag |= Pw - ab.Put2(0x65, // GS - 0x8B) - ab.asmand(ctxt, cursym, p, &pp.From, &p.To) } } return diff --git a/src/cmd/internal/obj/x86/obj6.go b/src/cmd/internal/obj/x86/obj6.go index a82285a0d3..85a4260453 100644 --- a/src/cmd/internal/obj/x86/obj6.go +++ b/src/cmd/internal/obj/x86/obj6.go @@ -158,16 +158,33 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { } } - // Android uses a tls offset determined at runtime. Rewrite + // Android and Win64 use a tls offset determined at runtime. Rewrite // MOVQ TLS, BX // to // MOVQ runtime.tls_g(SB), BX - if isAndroid && (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { + if (isAndroid || (ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64)) && + (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 { p.From.Type = obj.TYPE_MEM p.From.Name = obj.NAME_EXTERN p.From.Reg = REG_NONE p.From.Sym = ctxt.Lookup("runtime.tls_g") p.From.Index = REG_NONE + if ctxt.Headtype == objabi.Hwindows { + // Win64 requires an additional indirection + // to retrieve the TLS pointer, + // as runtime.tls_g contains the TLS offset from GS. + // add + // MOVQ 0(BX)(GS*1), BX + q := obj.Appendp(p, newprog) + q.As = p.As + q.From = obj.Addr{} + q.From.Type = obj.TYPE_MEM + q.From.Reg = p.To.Reg + q.From.Index = REG_GS + q.From.Scale = 1 + q.From.Offset = 0 + q.To = p.To + } } // TODO: Remove. -- cgit v1.3