aboutsummaryrefslogtreecommitdiff
path: root/src/liblink/obj6.c
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2014-04-15 13:45:39 -0400
committerRuss Cox <rsc@golang.org>2014-04-15 13:45:39 -0400
commit90093f0634d0143c6294e827e5c83fc0818ff8aa (patch)
tree52618c202e298ed054116e373e1a1f652ca5083b /src/liblink/obj6.c
parentaeb37527d3795b9677295bb21c0bbb3af18d6f31 (diff)
downloadgo-90093f0634d0143c6294e827e5c83fc0818ff8aa.tar.xz
liblink: introduce TLS register on 386 and amd64
When I did the original 386 ports on Linux and OS X, I chose to define GS-relative expressions like 4(GS) as relative to the actual thread-local storage base, which was usually GS but might not be (it might be FS, or it might be a different constant offset from GS or FS). The original scope was limited but since then the rewrites have gotten out of control. Sometimes GS is rewritten, sometimes FS. Some ports do other rewrites to enable shared libraries and other linking. At no point in the code is it clear whether you are looking at the real GS/FS or some synthesized thing that will be rewritten. The code manipulating all these is duplicated in many places. The first step to fixing issue 7719 is to make the code intelligible again. This CL adds an explicit TLS pseudo-register to the 386 and amd64. As a register, TLS refers to the thread-local storage base, and it can only be loaded into another register: MOVQ TLS, AX An offset from the thread-local storage base is written off(reg)(TLS*1). Semantically it is off(reg), but the (TLS*1) annotation marks this as indexing from the loaded TLS base. This emits a relocation so that if the linker needs to adjust the offset, it can. For example: MOVQ TLS, AX MOVQ 8(AX)(TLS*1), CX // load m into CX On systems that support direct access to the TLS memory, this pair of instructions can be reduced to a direct TLS memory reference: MOVQ 8(TLS), CX // load m into CX The 2-instruction and 1-instruction forms correspond roughly to ELF TLS initial exec mode and ELF TLS local exec mode, respectively. Liblink applies this rewrite on systems that support the 1-instruction form. The decision is made using only the operating system (and probably the -shared flag, eventually), not the link mode. If some link modes on a particular operating system require the 2-instruction form, then all builds for that operating system will use the 2-instruction form, so that the link mode decision can be delayed to link time. Obviously it is late to be making changes like this, but I despair of correcting issue 7719 and issue 7164 without it. To make sure I am not changing existing behavior, I built a "hello world" program for every GOOS/GOARCH combination we have and then worked to make sure that the rewrite generates exactly the same binaries, byte for byte. There are a handful of TODOs in the code marking kludges to get the byte-for-byte property, but at least now I can explain exactly how each binary is handled. The targets I tested this way are: darwin-386 darwin-amd64 dragonfly-386 dragonfly-amd64 freebsd-386 freebsd-amd64 freebsd-arm linux-386 linux-amd64 linux-arm nacl-386 nacl-amd64p32 netbsd-386 netbsd-amd64 openbsd-386 openbsd-amd64 plan9-386 plan9-amd64 solaris-amd64 windows-386 windows-amd64 There were four exceptions to the byte-for-byte goal: windows-386 and windows-amd64 have a time stamp at bytes 137 and 138 of the header. darwin-386 and plan9-386 have five or six modified bytes in the middle of the Go symbol table, caused by editing comments in runtime/sys_{darwin,plan9}_386.s. Fixes #7164. LGTM=iant R=iant, aram, minux.ma, dave CC=golang-codereviews https://golang.org/cl/87920043
Diffstat (limited to 'src/liblink/obj6.c')
-rw-r--r--src/liblink/obj6.c234
1 files changed, 107 insertions, 127 deletions
diff --git a/src/liblink/obj6.c b/src/liblink/obj6.c
index b4329e8862..fbb96c5e9e 100644
--- a/src/liblink/obj6.c
+++ b/src/liblink/obj6.c
@@ -99,6 +99,17 @@ settextflag(Prog *p, int f)
static void nacladdr(Link*, Prog*, Addr*);
+static int
+canuselocaltls(Link *ctxt)
+{
+ switch(ctxt->headtype) {
+// case Hlinux:
+ case Hwindows:
+ return 0;
+ }
+ return 1;
+}
+
static void
progedit(Link *ctxt, Prog *p)
{
@@ -106,105 +117,98 @@ progedit(Link *ctxt, Prog *p)
LSym *s;
Prog *q;
- if(ctxt->headtype == Hnacl) {
- nacladdr(ctxt, p, &p->from);
- nacladdr(ctxt, p, &p->to);
- }
-
- if(p->from.type == D_INDIR+D_GS || p->from.index == D_GS)
- p->from.offset += ctxt->tlsoffset;
- if(p->to.type == D_INDIR+D_GS || p->to.index == D_GS)
- p->to.offset += ctxt->tlsoffset;
+ // Thread-local storage references use the TLS pseudo-register.
+ // As a register, TLS refers to the thread-local storage base, and it
+ // can only be loaded into another register:
+ //
+ // MOVQ TLS, AX
+ //
+ // An offset from the thread-local storage base is written off(reg)(TLS*1).
+ // Semantically it is off(reg), but the (TLS*1) annotation marks this as
+ // indexing from the loaded TLS base. This emits a relocation so that
+ // if the linker needs to adjust the offset, it can. For example:
+ //
+ // MOVQ TLS, AX
+ // MOVQ 8(AX)(TLS*1), CX // load m into CX
+ //
+ // On systems that support direct access to the TLS memory, this
+ // pair of instructions can be reduced to a direct TLS memory reference:
+ //
+ // MOVQ 8(TLS), CX // load m into CX
+ //
+ // The 2-instruction and 1-instruction forms correspond roughly to
+ // ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
+ //
+ // We applies this rewrite on systems that support the 1-instruction form.
+ // The decision is made using only the operating system (and probably
+ // the -shared flag, eventually), not the link mode. If some link modes
+ // on a particular operating system require the 2-instruction form,
+ // then all builds for that operating system will use the 2-instruction
+ // form, so that the link mode decision can be delayed to link time.
+ //
+ // In this way, all supported systems use identical instructions to
+ // access TLS, and they are rewritten appropriately first here in
+ // liblink and then finally using relocations in the linker.
- if(ctxt->gmsym == nil)
- ctxt->gmsym = linklookup(ctxt, "runtime.tlsgm", 0);
-
- if(ctxt->headtype == Hwindows) {
- // Windows
- // Convert
- // op n(GS), reg
- // to
- // MOVL 0x28(GS), reg
- // op n(reg), reg
- // The purpose of this patch is to fix some accesses
- // to extern register variables (TLS) on Windows, as
- // a different method is used to access them.
- if(p->from.type == D_INDIR+D_GS
- && p->to.type >= D_AX && p->to.type <= D_DI
- && p->from.offset <= 8) {
- q = appendp(ctxt, p);
- q->from = p->from;
- q->from.type = D_INDIR + p->to.type;
- q->to = p->to;
- q->as = p->as;
- p->as = AMOVQ;
- p->from.type = D_INDIR+D_GS;
- p->from.offset = 0x28;
- }
- }
- if(ctxt->headtype == Hlinux || ctxt->headtype == Hfreebsd
- || ctxt->headtype == Hopenbsd || ctxt->headtype == Hnetbsd
- || ctxt->headtype == Hplan9 || ctxt->headtype == Hdragonfly
- || ctxt->headtype == Hsolaris) {
- // ELF uses FS instead of GS.
- if(p->from.type == D_INDIR+D_GS)
- p->from.type = D_INDIR+D_FS;
- if(p->to.type == D_INDIR+D_GS)
- p->to.type = D_INDIR+D_FS;
- if(p->from.index == D_GS)
- p->from.index = D_FS;
- if(p->to.index == D_GS)
- p->to.index = D_FS;
- }
- if(!ctxt->flag_shared) {
- // Convert g() or m() accesses of the form
- // op n(reg)(GS*1), reg
- // to
- // op n(GS*1), reg
- if(p->from.index == D_FS || p->from.index == D_GS) {
- p->from.type = D_INDIR + p->from.index;
+ if(canuselocaltls(ctxt)) {
+ // Reduce TLS initial exec model to TLS local exec model.
+ // Sequences like
+ // MOVQ TLS, BX
+ // ... off(BX)(TLS*1) ...
+ // become
+ // NOP
+ // ... off(TLS) ...
+ //
+ // TODO(rsc): Remove the Hsolaris special case. It exists only to
+ // guarantee we are producing byte-identical binaries as before this code.
+ // But it should be unnecessary.
+ if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris)
+ nopout(p);
+ if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) {
+ p->from.type = D_INDIR+D_TLS;
+ p->from.scale = 0;
p->from.index = D_NONE;
}
- // Convert g() or m() accesses of the form
- // op reg, n(reg)(GS*1)
- // to
- // op reg, n(GS*1)
- if(p->to.index == D_FS || p->to.index == D_GS) {
- p->to.type = D_INDIR + p->to.index;
+ if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) {
+ p->to.type = D_INDIR+D_TLS;
+ p->to.scale = 0;
p->to.index = D_NONE;
}
- // Convert get_tls access of the form
- // op runtime.tlsgm(SB), reg
- // to
- // NOP
- if(ctxt->gmsym != nil && p->from.sym == ctxt->gmsym) {
- p->as = ANOP;
- p->from.type = D_NONE;
- p->to.type = D_NONE;
- p->from.sym = nil;
- p->to.sym = nil;
- }
} else {
- // Convert TLS reads of the form
- // op n(GS), reg
- // to
- // MOVQ $runtime.tlsgm(SB), reg
- // op n(reg)(GS*1), reg
- if((p->from.type == D_INDIR+D_FS || p->from.type == D_INDIR + D_GS) && p->to.type >= D_AX && p->to.type <= D_DI) {
+ // As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load.
+ // The instruction
+ // MOVQ off(TLS), BX
+ // becomes the sequence
+ // MOVQ TLS, BX
+ // MOVQ off(BX)(TLS*1), BX
+ // This allows the C compilers to emit references to m and g using the direct off(TLS) form.
+ if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) {
q = appendp(ctxt, p);
- q->to = p->to;
q->as = p->as;
- q->from.type = D_INDIR+p->to.type;
- q->from.index = p->from.type - D_INDIR;
- q->from.scale = 1;
- q->from.offset = p->from.offset;
- p->as = AMOVQ;
- p->from.type = D_EXTERN;
- p->from.sym = ctxt->gmsym;
+ q->from = p->from;
+ q->from.type = D_INDIR + p->to.type;
+ q->from.index = D_TLS;
+ q->from.scale = 2; // TODO: use 1
+ q->to = p->to;
+ p->from.type = D_TLS;
+ p->from.index = D_NONE;
p->from.offset = 0;
}
}
+ // TODO: Remove.
+ if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) {
+ if(p->from.scale == 1 && p->from.index == D_TLS)
+ p->from.scale = 2;
+ if(p->to.scale == 1 && p->to.index == D_TLS)
+ p->to.scale = 2;
+ }
+
+ if(ctxt->headtype == Hnacl) {
+ nacladdr(ctxt, p, &p->from);
+ nacladdr(ctxt, p, &p->to);
+ }
+
// Maintain information about code generation mode.
if(ctxt->mode == 0)
ctxt->mode = 64;
@@ -315,9 +319,9 @@ nacladdr(Link *ctxt, Prog *p, Addr *a)
ctxt->diag("invalid address: %P", p);
return;
}
- if(a->type == D_INDIR+D_GS)
+ if(a->type == D_INDIR+D_TLS)
a->type = D_INDIR+D_BP;
- else if(a->type == D_GS)
+ else if(a->type == D_TLS)
a->type = D_BP;
if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) {
switch(a->type) {
@@ -632,48 +636,24 @@ indir_cx(Link *ctxt, Addr *a)
// Returns last new instruction.
static Prog*
load_g_cx(Link *ctxt, Prog *p)
-{
- if(ctxt->flag_shared) {
- // Load TLS offset with MOVQ $runtime.tlsgm(SB), CX
- p->as = AMOVQ;
- p->from.type = D_EXTERN;
- p->from.sym = ctxt->gmsym;
- p->to.type = D_CX;
- p = appendp(ctxt, p);
- }
+{
+ Prog *next;
+
p->as = AMOVQ;
- if(ctxt->headtype == Hlinux || ctxt->headtype == Hfreebsd
- || ctxt->headtype == Hopenbsd || ctxt->headtype == Hnetbsd
- || ctxt->headtype == Hplan9 || ctxt->headtype == Hdragonfly
- || ctxt->headtype == Hsolaris)
- // ELF uses FS
- p->from.type = D_INDIR+D_FS;
- else if(ctxt->headtype == Hnacl) {
+ if(ctxt->arch->ptrsize == 4)
p->as = AMOVL;
- p->from.type = D_INDIR+D_BP;
- } else
- p->from.type = D_INDIR+D_GS;
- if(ctxt->flag_shared) {
- // Add TLS offset stored in CX
- p->from.index = p->from.type - D_INDIR;
- indir_cx(ctxt, &p->from);
- }
- p->from.offset = ctxt->tlsoffset+0;
+ p->from.type = D_INDIR+D_TLS;
+ p->from.offset = 0;
p->to.type = D_CX;
- if(ctxt->headtype == Hwindows) {
- // movq %gs:0x28, %rcx
- // movq (%rcx), %rcx
- p->as = AMOVQ;
- p->from.type = D_INDIR+D_GS;
- p->from.offset = 0x28;
- p->to.type = D_CX;
+
+ next = p->link;
+ progedit(ctxt, p);
+ while(p->link != next)
+ p = p->link;
+
+ if(p->from.index == D_TLS)
+ p->from.scale = 2;
- p = appendp(ctxt, p);
- p->as = AMOVQ;
- indir_cx(ctxt, &p->from);
- p->from.offset = 0;
- p->to.type = D_CX;
- }
return p;
}