diff options
| author | Russ Cox <rsc@golang.org> | 2014-04-15 13:45:39 -0400 |
|---|---|---|
| committer | Russ Cox <rsc@golang.org> | 2014-04-15 13:45:39 -0400 |
| commit | 90093f0634d0143c6294e827e5c83fc0818ff8aa (patch) | |
| tree | 52618c202e298ed054116e373e1a1f652ca5083b /src/liblink | |
| parent | aeb37527d3795b9677295bb21c0bbb3af18d6f31 (diff) | |
| download | go-90093f0634d0143c6294e827e5c83fc0818ff8aa.tar.xz | |
liblink: introduce TLS register on 386 and amd64
When I did the original 386 ports on Linux and OS X, I chose to
define GS-relative expressions like 4(GS) as relative to the actual
thread-local storage base, which was usually GS but might not be
(it might be FS, or it might be a different constant offset from GS or FS).
The original scope was limited but since then the rewrites have
gotten out of control. Sometimes GS is rewritten, sometimes FS.
Some ports do other rewrites to enable shared libraries and
other linking. At no point in the code is it clear whether you are
looking at the real GS/FS or some synthesized thing that will be
rewritten. The code manipulating all these is duplicated in many
places.
The first step to fixing issue 7719 is to make the code intelligible
again.
This CL adds an explicit TLS pseudo-register to the 386 and amd64.
As a register, TLS refers to the thread-local storage base, and it
can only be loaded into another register:
MOVQ TLS, AX
An offset from the thread-local storage base is written off(reg)(TLS*1).
Semantically it is off(reg), but the (TLS*1) annotation marks this as
indexing from the loaded TLS base. This emits a relocation so that
if the linker needs to adjust the offset, it can. For example:
MOVQ TLS, AX
MOVQ 8(AX)(TLS*1), CX // load m into CX
On systems that support direct access to the TLS memory, this
pair of instructions can be reduced to a direct TLS memory reference:
MOVQ 8(TLS), CX // load m into CX
The 2-instruction and 1-instruction forms correspond roughly to
ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
Liblink applies this rewrite on systems that support the 1-instruction form.
The decision is made using only the operating system (and probably
the -shared flag, eventually), not the link mode. If some link modes
on a particular operating system require the 2-instruction form,
then all builds for that operating system will use the 2-instruction
form, so that the link mode decision can be delayed to link time.
Obviously it is late to be making changes like this, but I despair
of correcting issue 7719 and issue 7164 without it. To make sure
I am not changing existing behavior, I built a "hello world" program
for every GOOS/GOARCH combination we have and then worked
to make sure that the rewrite generates exactly the same binaries,
byte for byte. There are a handful of TODOs in the code marking
kludges to get the byte-for-byte property, but at least now I can
explain exactly how each binary is handled.
The targets I tested this way are:
darwin-386
darwin-amd64
dragonfly-386
dragonfly-amd64
freebsd-386
freebsd-amd64
freebsd-arm
linux-386
linux-amd64
linux-arm
nacl-386
nacl-amd64p32
netbsd-386
netbsd-amd64
openbsd-386
openbsd-amd64
plan9-386
plan9-amd64
solaris-amd64
windows-386
windows-amd64
There were four exceptions to the byte-for-byte goal:
windows-386 and windows-amd64 have a time stamp
at bytes 137 and 138 of the header.
darwin-386 and plan9-386 have five or six modified
bytes in the middle of the Go symbol table, caused by
editing comments in runtime/sys_{darwin,plan9}_386.s.
Fixes #7164.
LGTM=iant
R=iant, aram, minux.ma, dave
CC=golang-codereviews
https://golang.org/cl/87920043
Diffstat (limited to 'src/liblink')
| -rw-r--r-- | src/liblink/asm6.c | 114 | ||||
| -rw-r--r-- | src/liblink/asm8.c | 126 | ||||
| -rw-r--r-- | src/liblink/list6.c | 1 | ||||
| -rw-r--r-- | src/liblink/list8.c | 1 | ||||
| -rw-r--r-- | src/liblink/obj6.c | 234 | ||||
| -rw-r--r-- | src/liblink/obj8.c | 183 | ||||
| -rw-r--r-- | src/liblink/objfile.c | 6 | ||||
| -rw-r--r-- | src/liblink/sym.c | 1 |
8 files changed, 412 insertions, 254 deletions
diff --git a/src/liblink/asm6.c b/src/liblink/asm6.c index 213b1b55d7..104a08e21e 100644 --- a/src/liblink/asm6.c +++ b/src/liblink/asm6.c @@ -114,6 +114,7 @@ enum Ytr0, Ytr1, Ytr2, Ytr3, Ytr4, Ytr5, Ytr6, Ytr7, Yrl32, Yrl64, Ymr, Ymm, Yxr, Yxm, + Ytls, Ymax, Zxxx = 0, @@ -1871,7 +1872,7 @@ instinit(void) } static int -prefixof(Addr *a) +prefixof(Link *ctxt, Addr *a) { switch(a->type) { case D_INDIR+D_CS: @@ -1884,6 +1885,27 @@ prefixof(Addr *a) return 0x64; case D_INDIR+D_GS: return 0x65; + case D_INDIR+D_TLS: + // NOTE: Systems listed here should be only systems that + // support direct TLS references like 8(TLS) implemented as + // direct references from FS or GS. Systems that require + // the initial-exec model, where you load the TLS base into + // a register and then index from that register, do not reach + // this code and should not be listed. + switch(ctxt->headtype) { + default: + sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype)); + case Hdragonfly: + case Hfreebsd: + case Hlinux: + case Hnetbsd: + case Hopenbsd: + case Hplan9: + case Hsolaris: + return 0x64; // FS + case Hdarwin: + return 0x65; // GS + } } switch(a->index) { case D_CS: @@ -2033,6 +2055,7 @@ oclass(Link *ctxt, Addr *a) case D_ES: return Yes; case D_FS: return Yfs; case D_GS: return Ygs; + case D_TLS: return Ytls; case D_GDTR: return Ygdtr; case D_IDTR: return Yidtr; @@ -2278,6 +2301,19 @@ vaddr(Link *ctxt, Addr *a, Reloc *r) r->type = R_PCREL; } else r->type = R_ADDR; + break; + + case D_INDIR+D_TLS: + if(r == nil) { + ctxt->diag("need reloc for %D", a); + sysfatal("reloc"); + } + r->type = R_TLS_LE; + r->siz = 4; + r->off = -1; // caller must fill in + r->add = v; + v = 0; + break; } return v; } @@ -2294,7 +2330,7 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64) v = a->offset; t = a->type; rel.siz = 0; - if(a->index != D_NONE && a->index != D_FS && a->index != D_GS) { + if(a->index != D_NONE && a->index != D_TLS) { if(t < D_INDIR) { switch(t) { default: @@ -2360,9 +2396,11 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64) scale = 1; } else t -= D_INDIR; + if(t == D_TLS) + v = vaddr(ctxt, a, &rel); ctxt->rexflag |= (regrex[t] & Rxb) | rex; - if(t == D_NONE || (D_CS <= t && t <= D_GS)) { + if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) { if((ctxt->flag_shared || ctxt->headtype == Hnacl) && t == D_NONE && (a->type == D_STATIC || a->type == D_EXTERN) || ctxt->asmode != 64) { *ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3); goto putrelv; @@ -2389,17 +2427,38 @@ asmandsz(Link *ctxt, Addr *a, int r, int rex, int m64) goto putrelv; } if(t >= D_AX && t <= D_R15) { - if(v == 0 && t != D_BP && t != D_R13) { + // TODO: Remove Hwindows condition. + if(v == 0 && t != D_BP && t != D_R13 && (a->index != D_TLS || (ctxt->headtype == Hwindows && a->scale == 2))) { *ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3); return; } - if(v >= -128 && v < 128) { + if(v >= -128 && v < 128 && (a->index != D_TLS || a->scale != 1)) { ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3); + if(a->index == D_TLS) { + Reloc *r; + memset(&rel, 0, sizeof rel); + rel.type = R_TLS_IE; + rel.siz = 1; + rel.sym = nil; + rel.add = v; + r = addrel(ctxt->cursym); + *r = rel; + r->off = ctxt->curp->pc + ctxt->andptr + 1 - ctxt->and; + v = 0; + } ctxt->andptr[1] = v; ctxt->andptr += 2; return; } *ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3); + if(a->index == D_TLS) { + memset(&rel, 0, sizeof rel); + rel.type = R_TLS_IE; + rel.siz = 4; + rel.sym = nil; + rel.add = v; + v = 0; + } goto putrelv; } goto bad; @@ -2574,6 +2633,10 @@ static Movtab ymovtab[] = {ASHRQ, Ycol, Yml, 6, Pw,0xac,0xad,0}, {ASHLW, Ycol, Yml, 6, Pe,0xa4,0xa5,0}, {ASHRW, Ycol, Yml, 6, Pe,0xac,0xad,0}, + +/* load TLS base */ + {AMOVQ, Ytls, Yrl, 7, 0,0,0,0}, + 0 }; @@ -2664,10 +2727,10 @@ doasm(Link *ctxt, Prog *p) return; } - pre = prefixof(&p->from); + pre = prefixof(ctxt, &p->from); if(pre) *ctxt->andptr++ = pre; - pre = prefixof(&p->to); + pre = prefixof(ctxt, &p->to); if(pre) *ctxt->andptr++ = pre; @@ -3296,6 +3359,43 @@ mfound: break; } break; + + case 7: /* mov tls, r */ + // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, + // where you load the TLS base register into a register and then index off that + // register to access the actual TLS variables. Systems that allow direct TLS access + // are handled in prefixof above and should not be listed here. + switch(ctxt->headtype) { + default: + sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype)); + + case Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c. + // TLS base is 0(FS). + pp.from = p->from; + pp.from.type = D_INDIR+D_NONE; + pp.from.offset = 0; + pp.from.index = D_NONE; + pp.from.scale = 0; + ctxt->rexflag |= Pw; + *ctxt->andptr++ = 0x64; // FS + *ctxt->andptr++ = 0x8B; + asmand(ctxt, &pp.from, &p->to); + break; + + case Hwindows: + // Windows TLS base is always 0x28(GS). + pp.from = p->from; + pp.from.type = D_INDIR+D_GS; + pp.from.offset = 0x28; + pp.from.index = D_NONE; + pp.from.scale = 0; + ctxt->rexflag |= Pw; + *ctxt->andptr++ = 0x65; // GS + *ctxt->andptr++ = 0x8B; + asmand(ctxt, &pp.from, &p->to); + break; + } + break; } } diff --git a/src/liblink/asm8.c b/src/liblink/asm8.c index 2bf6707e1e..943db80f2f 100644 --- a/src/liblink/asm8.c +++ b/src/liblink/asm8.c @@ -78,6 +78,7 @@ enum Ym, Ybr, Ycol, + Ytls, Ycs, Yss, Yds, Yes, Yfs, Ygs, Ygdtr, Yidtr, Yldtr, Ymsw, Ytask, @@ -1441,7 +1442,7 @@ instinit(void) } static int -prefixof(Addr *a) +prefixof(Link *ctxt, Addr *a) { switch(a->type) { case D_INDIR+D_CS: @@ -1454,6 +1455,23 @@ prefixof(Addr *a) return 0x64; case D_INDIR+D_GS: return 0x65; + case D_INDIR+D_TLS: + // NOTE: Systems listed here should be only systems that + // support direct TLS references like 8(TLS) implemented as + // direct references from FS or GS. Systems that require + // the initial-exec model, where you load the TLS base into + // a register and then index from that register, do not reach + // this code and should not be listed. + switch(ctxt->headtype) { + default: + sysfatal("unknown TLS base register for %s", headstr(ctxt->headtype)); + case Hdarwin: + case Hdragonfly: + case Hfreebsd: + case Hnetbsd: + case Hopenbsd: + return 0x65; // GS + } } return 0; } @@ -1543,6 +1561,7 @@ oclass(Addr *a) case D_ES: return Yes; case D_FS: return Yfs; case D_GS: return Ygs; + case D_TLS: return Ytls; case D_GDTR: return Ygdtr; case D_IDTR: return Yidtr; @@ -1724,6 +1743,19 @@ vaddr(Link *ctxt, Addr *a, Reloc *r) r->add = v; v = 0; } + break; + + case D_INDIR+D_TLS: + if(r == nil) { + ctxt->diag("need reloc for %D", a); + sysfatal("bad code"); + } + r->type = R_TLS_LE; + r->siz = 4; + r->off = -1; // caller must fill in + r->add = v; + v = 0; + break; } return v; } @@ -1738,7 +1770,7 @@ asmand(Link *ctxt, Addr *a, int r) v = a->offset; t = a->type; rel.siz = 0; - if(a->index != D_NONE && a->index != D_FS && a->index != D_GS) { + if(a->index != D_NONE && a->index != D_TLS) { if(t < D_INDIR || t >= 2*D_INDIR) { switch(t) { default: @@ -1801,8 +1833,10 @@ asmand(Link *ctxt, Addr *a, int r) scale = 1; } else t -= D_INDIR; + if(t == D_TLS) + v = vaddr(ctxt, a, &rel); - if(t == D_NONE || (D_CS <= t && t <= D_GS)) { + if(t == D_NONE || (D_CS <= t && t <= D_GS) || t == D_TLS) { *ctxt->andptr++ = (0 << 6) | (5 << 0) | (r << 3); goto putrelv; } @@ -1823,17 +1857,43 @@ asmand(Link *ctxt, Addr *a, int r) goto putrelv; } if(t >= D_AX && t <= D_DI) { - if(v == 0 && rel.siz == 0 && t != D_BP) { + // TODO(rsc): Remove the Hwindows test. + // As written it produces the same byte-identical output as the code it replaced. + if(v == 0 && rel.siz == 0 && t != D_BP && (a->index != D_TLS || ctxt->headtype == Hwindows)) { *ctxt->andptr++ = (0 << 6) | (reg[t] << 0) | (r << 3); return; } - if(v >= -128 && v < 128 && rel.siz == 0 && a->index != D_FS && a->index != D_GS) { + // TODO(rsc): Change a->index tests to check D_TLS. + // Then remove the if statement inside the body. + // As written the code is clearly incorrect for external linking, + // but as written it produces the same byte-identical output as the code it replaced. + if(v >= -128 && v < 128 && rel.siz == 0 && (a->index != D_TLS || ctxt->headtype == Hwindows || a->scale != 1)) { ctxt->andptr[0] = (1 << 6) | (reg[t] << 0) | (r << 3); + if(a->index == D_TLS) { + Reloc *r; + memset(&rel, 0, sizeof rel); + rel.type = R_TLS_IE; + rel.siz = 1; + rel.sym = nil; + rel.add = v; + r = addrel(ctxt->cursym); + *r = rel; + r->off = ctxt->curp->pc + ctxt->andptr + 1 - ctxt->and; + v = 0; + } ctxt->andptr[1] = v; ctxt->andptr += 2; return; } *ctxt->andptr++ = (2 << 6) | (reg[t] << 0) | (r << 3); + if(a->index == D_TLS) { + memset(&rel, 0, sizeof rel); + rel.type = R_TLS_IE; + rel.siz = 4; + rel.sym = nil; + rel.add = v; + v = 0; + } goto putrelv; } goto bad; @@ -1961,6 +2021,10 @@ static uchar ymovtab[] = /* extra imul */ AIMULW, Yml, Yrl, 7, Pq,0xaf,0,0, AIMULL, Yml, Yrl, 7, Pm,0xaf,0,0, + +/* load TLS base pointer */ + AMOVL, Ytls, Yrl, 8, 0,0,0,0, + 0 }; @@ -2108,10 +2172,10 @@ doasm(Link *ctxt, Prog *p) ctxt->curp = p; // TODO - pre = prefixof(&p->from); + pre = prefixof(ctxt, &p->from); if(pre) *ctxt->andptr++ = pre; - pre = prefixof(&p->to); + pre = prefixof(ctxt, &p->to); if(pre) *ctxt->andptr++ = pre; @@ -2628,6 +2692,54 @@ mfound: *ctxt->andptr++ = t[5]; asmand(ctxt, &p->from, reg[p->to.type]); break; + + case 8: /* mov tls, r */ + // NOTE: The systems listed here are the ones that use the "TLS initial exec" model, + // where you load the TLS base register into a register and then index off that + // register to access the actual TLS variables. Systems that allow direct TLS access + // are handled in prefixof above and should not be listed here. + switch(ctxt->headtype) { + default: + sysfatal("unknown TLS base location for %s", headstr(ctxt->headtype)); + + case Hlinux: + case Hnacl: + // ELF TLS base is 0(GS). + pp.from = p->from; + pp.from.type = D_INDIR+D_GS; + pp.from.offset = 0; + pp.from.index = D_NONE; + pp.from.scale = 0; + *ctxt->andptr++ = 0x65; // GS + *ctxt->andptr++ = 0x8B; + asmand(ctxt, &pp.from, reg[p->to.type]); + break; + + case Hplan9: + if(ctxt->plan9tos == nil) + ctxt->plan9tos = linklookup(ctxt, "_tos", 0); + memset(&pp.from, 0, sizeof pp.from); + pp.from.type = D_EXTERN; + pp.from.sym = ctxt->plan9tos; + pp.from.offset = 0; + pp.from.index = D_NONE; + *ctxt->andptr++ = 0x8B; + asmand(ctxt, &pp.from, reg[p->to.type]); + break; + + case Hwindows: + // Windows TLS base is always 0x14(FS). + pp.from = p->from; + pp.from.type = D_INDIR+D_FS; + pp.from.offset = 0x14; + pp.from.index = D_NONE; + pp.from.scale = 0; + *ctxt->andptr++ = 0x64; // FS + *ctxt->andptr++ = 0x8B; + asmand(ctxt, &pp.from, reg[p->to.type]); + break; + } + break; } } diff --git a/src/liblink/list6.c b/src/liblink/list6.c index eaf52f2e73..fe708d8774 100644 --- a/src/liblink/list6.c +++ b/src/liblink/list6.c @@ -341,6 +341,7 @@ char* regstr[] = "TR6", "TR7", + "TLS", /* [D_TLS] */ "NONE", /* [D_NONE] */ }; diff --git a/src/liblink/list8.c b/src/liblink/list8.c index c000ce25b0..7866924163 100644 --- a/src/liblink/list8.c +++ b/src/liblink/list8.c @@ -289,6 +289,7 @@ char* regstr[] = "X6", "X7", + "TLS", /* [D_TLS] */ "NONE", /* [D_NONE] */ }; diff --git a/src/liblink/obj6.c b/src/liblink/obj6.c index b4329e8862..fbb96c5e9e 100644 --- a/src/liblink/obj6.c +++ b/src/liblink/obj6.c @@ -99,6 +99,17 @@ settextflag(Prog *p, int f) static void nacladdr(Link*, Prog*, Addr*); +static int +canuselocaltls(Link *ctxt) +{ + switch(ctxt->headtype) { +// case Hlinux: + case Hwindows: + return 0; + } + return 1; +} + static void progedit(Link *ctxt, Prog *p) { @@ -106,105 +117,98 @@ progedit(Link *ctxt, Prog *p) LSym *s; Prog *q; - if(ctxt->headtype == Hnacl) { - nacladdr(ctxt, p, &p->from); - nacladdr(ctxt, p, &p->to); - } - - if(p->from.type == D_INDIR+D_GS || p->from.index == D_GS) - p->from.offset += ctxt->tlsoffset; - if(p->to.type == D_INDIR+D_GS || p->to.index == D_GS) - p->to.offset += ctxt->tlsoffset; + // Thread-local storage references use the TLS pseudo-register. + // As a register, TLS refers to the thread-local storage base, and it + // can only be loaded into another register: + // + // MOVQ TLS, AX + // + // An offset from the thread-local storage base is written off(reg)(TLS*1). + // Semantically it is off(reg), but the (TLS*1) annotation marks this as + // indexing from the loaded TLS base. This emits a relocation so that + // if the linker needs to adjust the offset, it can. For example: + // + // MOVQ TLS, AX + // MOVQ 8(AX)(TLS*1), CX // load m into CX + // + // On systems that support direct access to the TLS memory, this + // pair of instructions can be reduced to a direct TLS memory reference: + // + // MOVQ 8(TLS), CX // load m into CX + // + // The 2-instruction and 1-instruction forms correspond roughly to + // ELF TLS initial exec mode and ELF TLS local exec mode, respectively. + // + // We applies this rewrite on systems that support the 1-instruction form. + // The decision is made using only the operating system (and probably + // the -shared flag, eventually), not the link mode. If some link modes + // on a particular operating system require the 2-instruction form, + // then all builds for that operating system will use the 2-instruction + // form, so that the link mode decision can be delayed to link time. + // + // In this way, all supported systems use identical instructions to + // access TLS, and they are rewritten appropriately first here in + // liblink and then finally using relocations in the linker. - if(ctxt->gmsym == nil) - ctxt->gmsym = linklookup(ctxt, "runtime.tlsgm", 0); - - if(ctxt->headtype == Hwindows) { - // Windows - // Convert - // op n(GS), reg - // to - // MOVL 0x28(GS), reg - // op n(reg), reg - // The purpose of this patch is to fix some accesses - // to extern register variables (TLS) on Windows, as - // a different method is used to access them. - if(p->from.type == D_INDIR+D_GS - && p->to.type >= D_AX && p->to.type <= D_DI - && p->from.offset <= 8) { - q = appendp(ctxt, p); - q->from = p->from; - q->from.type = D_INDIR + p->to.type; - q->to = p->to; - q->as = p->as; - p->as = AMOVQ; - p->from.type = D_INDIR+D_GS; - p->from.offset = 0x28; - } - } - if(ctxt->headtype == Hlinux || ctxt->headtype == Hfreebsd - || ctxt->headtype == Hopenbsd || ctxt->headtype == Hnetbsd - || ctxt->headtype == Hplan9 || ctxt->headtype == Hdragonfly - || ctxt->headtype == Hsolaris) { - // ELF uses FS instead of GS. - if(p->from.type == D_INDIR+D_GS) - p->from.type = D_INDIR+D_FS; - if(p->to.type == D_INDIR+D_GS) - p->to.type = D_INDIR+D_FS; - if(p->from.index == D_GS) - p->from.index = D_FS; - if(p->to.index == D_GS) - p->to.index = D_FS; - } - if(!ctxt->flag_shared) { - // Convert g() or m() accesses of the form - // op n(reg)(GS*1), reg - // to - // op n(GS*1), reg - if(p->from.index == D_FS || p->from.index == D_GS) { - p->from.type = D_INDIR + p->from.index; + if(canuselocaltls(ctxt)) { + // Reduce TLS initial exec model to TLS local exec model. + // Sequences like + // MOVQ TLS, BX + // ... off(BX)(TLS*1) ... + // become + // NOP + // ... off(TLS) ... + // + // TODO(rsc): Remove the Hsolaris special case. It exists only to + // guarantee we are producing byte-identical binaries as before this code. + // But it should be unnecessary. + if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_R15 && ctxt->headtype != Hsolaris) + nopout(p); + if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_R15) { + p->from.type = D_INDIR+D_TLS; + p->from.scale = 0; p->from.index = D_NONE; } - // Convert g() or m() accesses of the form - // op reg, n(reg)(GS*1) - // to - // op reg, n(GS*1) - if(p->to.index == D_FS || p->to.index == D_GS) { - p->to.type = D_INDIR + p->to.index; + if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_R15) { + p->to.type = D_INDIR+D_TLS; + p->to.scale = 0; p->to.index = D_NONE; } - // Convert get_tls access of the form - // op runtime.tlsgm(SB), reg - // to - // NOP - if(ctxt->gmsym != nil && p->from.sym == ctxt->gmsym) { - p->as = ANOP; - p->from.type = D_NONE; - p->to.type = D_NONE; - p->from.sym = nil; - p->to.sym = nil; - } } else { - // Convert TLS reads of the form - // op n(GS), reg - // to - // MOVQ $runtime.tlsgm(SB), reg - // op n(reg)(GS*1), reg - if((p->from.type == D_INDIR+D_FS || p->from.type == D_INDIR + D_GS) && p->to.type >= D_AX && p->to.type <= D_DI) { + // As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load. + // The instruction + // MOVQ off(TLS), BX + // becomes the sequence + // MOVQ TLS, BX + // MOVQ off(BX)(TLS*1), BX + // This allows the C compilers to emit references to m and g using the direct off(TLS) form. + if((p->as == AMOVQ || p->as == AMOVL) && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_R15) { q = appendp(ctxt, p); - q->to = p->to; q->as = p->as; - q->from.type = D_INDIR+p->to.type; - q->from.index = p->from.type - D_INDIR; - q->from.scale = 1; - q->from.offset = p->from.offset; - p->as = AMOVQ; - p->from.type = D_EXTERN; - p->from.sym = ctxt->gmsym; + q->from = p->from; + q->from.type = D_INDIR + p->to.type; + q->from.index = D_TLS; + q->from.scale = 2; // TODO: use 1 + q->to = p->to; + p->from.type = D_TLS; + p->from.index = D_NONE; p->from.offset = 0; } } + // TODO: Remove. + if(ctxt->headtype == Hwindows || ctxt->headtype == Hplan9) { + if(p->from.scale == 1 && p->from.index == D_TLS) + p->from.scale = 2; + if(p->to.scale == 1 && p->to.index == D_TLS) + p->to.scale = 2; + } + + if(ctxt->headtype == Hnacl) { + nacladdr(ctxt, p, &p->from); + nacladdr(ctxt, p, &p->to); + } + // Maintain information about code generation mode. if(ctxt->mode == 0) ctxt->mode = 64; @@ -315,9 +319,9 @@ nacladdr(Link *ctxt, Prog *p, Addr *a) ctxt->diag("invalid address: %P", p); return; } - if(a->type == D_INDIR+D_GS) + if(a->type == D_INDIR+D_TLS) a->type = D_INDIR+D_BP; - else if(a->type == D_GS) + else if(a->type == D_TLS) a->type = D_BP; if(D_INDIR <= a->type && a->type <= D_INDIR+D_INDIR) { switch(a->type) { @@ -632,48 +636,24 @@ indir_cx(Link *ctxt, Addr *a) // Returns last new instruction. static Prog* load_g_cx(Link *ctxt, Prog *p) -{ - if(ctxt->flag_shared) { - // Load TLS offset with MOVQ $runtime.tlsgm(SB), CX - p->as = AMOVQ; - p->from.type = D_EXTERN; - p->from.sym = ctxt->gmsym; - p->to.type = D_CX; - p = appendp(ctxt, p); - } +{ + Prog *next; + p->as = AMOVQ; - if(ctxt->headtype == Hlinux || ctxt->headtype == Hfreebsd - || ctxt->headtype == Hopenbsd || ctxt->headtype == Hnetbsd - || ctxt->headtype == Hplan9 || ctxt->headtype == Hdragonfly - || ctxt->headtype == Hsolaris) - // ELF uses FS - p->from.type = D_INDIR+D_FS; - else if(ctxt->headtype == Hnacl) { + if(ctxt->arch->ptrsize == 4) p->as = AMOVL; - p->from.type = D_INDIR+D_BP; - } else - p->from.type = D_INDIR+D_GS; - if(ctxt->flag_shared) { - // Add TLS offset stored in CX - p->from.index = p->from.type - D_INDIR; - indir_cx(ctxt, &p->from); - } - p->from.offset = ctxt->tlsoffset+0; + p->from.type = D_INDIR+D_TLS; + p->from.offset = 0; p->to.type = D_CX; - if(ctxt->headtype == Hwindows) { - // movq %gs:0x28, %rcx - // movq (%rcx), %rcx - p->as = AMOVQ; - p->from.type = D_INDIR+D_GS; - p->from.offset = 0x28; - p->to.type = D_CX; + + next = p->link; + progedit(ctxt, p); + while(p->link != next) + p = p->link; + + if(p->from.index == D_TLS) + p->from.scale = 2; - p = appendp(ctxt, p); - p->as = AMOVQ; - indir_cx(ctxt, &p->from); - p->from.offset = 0; - p->to.type = D_CX; - } return p; } diff --git a/src/liblink/obj8.c b/src/liblink/obj8.c index d36db84705..72934c1499 100644 --- a/src/liblink/obj8.c +++ b/src/liblink/obj8.c @@ -91,80 +91,80 @@ settextflag(Prog *p, int f) p->from.scale = f; } +static int +canuselocaltls(Link *ctxt) +{ + switch(ctxt->headtype) { + case Hlinux: + case Hnacl: + case Hplan9: + case Hwindows: + return 0; + } + return 1; +} + static void progedit(Link *ctxt, Prog *p) { - Prog *q; char literal[64]; LSym *s; - - if(p->from.type == D_INDIR+D_GS) - p->from.offset += ctxt->tlsoffset; - if(p->to.type == D_INDIR+D_GS) - p->to.offset += ctxt->tlsoffset; - - if(ctxt->headtype == Hwindows) { - // Convert - // op n(GS), reg - // to - // MOVL 0x14(FS), reg - // op n(reg), reg - // The purpose of this patch is to fix some accesses - // to extern register variables (TLS) on Windows, as - // a different method is used to access them. - if(p->from.type == D_INDIR+D_GS - && p->to.type >= D_AX && p->to.type <= D_DI) { - q = appendp(ctxt, p); - q->from = p->from; - q->from.type = D_INDIR + p->to.type; - q->to = p->to; - q->as = p->as; - p->as = AMOVL; - p->from.type = D_INDIR+D_FS; - p->from.offset = 0x14; + Prog *q; + + // See obj6.c for discussion of TLS. + if(canuselocaltls(ctxt)) { + // Reduce TLS initial exec model to TLS local exec model. + // Sequences like + // MOVL TLS, BX + // ... off(BX)(TLS*1) ... + // become + // NOP + // ... off(TLS) ... + if(p->as == AMOVL && p->from.type == D_TLS && D_AX <= p->to.type && p->to.type <= D_DI) { + p->as = ANOP; + p->from.type = D_NONE; + p->to.type = D_NONE; } - } - if(ctxt->headtype == Hlinux || ctxt->headtype == Hnacl) { - // Running binaries under Xen requires using - // MOVL 0(GS), reg - // and then off(reg) instead of saying off(GS) directly - // when the offset is negative. - // In external mode we just produce a reloc. - if(p->from.type == D_INDIR+D_GS && p->from.offset < 0 - && p->to.type >= D_AX && p->to.type <= D_DI) { - if(ctxt->linkmode != LinkExternal) { - q = appendp(ctxt, p); - q->from = p->from; - q->from.type = D_INDIR + p->to.type; - q->to = p->to; - q->as = p->as; - p->as = AMOVL; - p->from.type = D_INDIR+D_GS; - p->from.offset = 0; - } else { - // Add signals to relocate. - p->from.index = D_GS; - p->from.scale = 1; - } + if(p->from.index == D_TLS && D_INDIR+D_AX <= p->from.type && p->from.type <= D_INDIR+D_DI) { + p->from.type = D_INDIR+D_TLS; + p->from.scale = 0; + p->from.index = D_NONE; } - } - if(ctxt->headtype == Hplan9) { - if(p->from.type == D_INDIR+D_GS - && p->to.type >= D_AX && p->to.type <= D_DI) { - if(ctxt->plan9tos == nil) - ctxt->plan9tos = linklookup(ctxt, "_tos", 0); + if(p->to.index == D_TLS && D_INDIR+D_AX <= p->to.type && p->to.type <= D_INDIR+D_DI) { + p->to.type = D_INDIR+D_TLS; + p->to.scale = 0; + p->to.index = D_NONE; + } + } else { + // As a courtesy to the C compilers, rewrite TLS local exec load as TLS initial exec load. + // The instruction + // MOVL off(TLS), BX + // becomes the sequence + // MOVL TLS, BX + // MOVL off(BX)(TLS*1), BX + // This allows the C compilers to emit references to m and g using the direct off(TLS) form. + if(p->as == AMOVL && p->from.type == D_INDIR+D_TLS && D_AX <= p->to.type && p->to.type <= D_DI) { q = appendp(ctxt, p); + q->as = p->as; q->from = p->from; q->from.type = D_INDIR + p->to.type; + q->from.index = D_TLS; + q->from.scale = 2; // TODO: use 1 q->to = p->to; - q->as = p->as; - p->as = AMOVL; - p->from.type = D_EXTERN; - p->from.sym = ctxt->plan9tos; + p->from.type = D_TLS; + p->from.index = D_NONE; p->from.offset = 0; } } + // TODO: Remove. + if(ctxt->headtype == Hplan9) { + if(p->from.scale == 1 && p->from.index == D_TLS) + p->from.scale = 2; + if(p->to.scale == 1 && p->to.index == D_TLS) + p->to.scale = 2; + } + // Rewrite CALL/JMP/RET to symbol as D_BRANCH. switch(p->as) { case ACALL: @@ -435,62 +435,21 @@ addstacksplit(Link *ctxt, LSym *cursym) static Prog* load_g_cx(Link *ctxt, Prog *p) { - switch(ctxt->headtype) { - case Hwindows: - p->as = AMOVL; - p->from.type = D_INDIR+D_FS; - p->from.offset = 0x14; - p->to.type = D_CX; + Prog *next; - p = appendp(ctxt, p); - p->as = AMOVL; - p->from.type = D_INDIR+D_CX; - p->from.offset = 0; - p->to.type = D_CX; - break; - - case Hlinux: - case Hnacl: - if(ctxt->linkmode != LinkExternal) { - p->as = AMOVL; - p->from.type = D_INDIR+D_GS; - p->from.offset = 0; - p->to.type = D_CX; + p->as = AMOVL; + p->from.type = D_INDIR+D_TLS; + p->from.offset = 0; + p->to.type = D_CX; - p = appendp(ctxt, p); - p->as = AMOVL; - p->from.type = D_INDIR+D_CX; - p->from.offset = ctxt->tlsoffset + 0; - p->to.type = D_CX; - } else { - p->as = AMOVL; - p->from.type = D_INDIR+D_GS; - p->from.offset = ctxt->tlsoffset + 0; - p->to.type = D_CX; - p->from.index = D_GS; - p->from.scale = 1; - } - break; - - case Hplan9: - p->as = AMOVL; - p->from.type = D_EXTERN; - p->from.sym = ctxt->plan9tos; - p->to.type = D_CX; - - p = appendp(ctxt, p); - p->as = AMOVL; - p->from.type = D_INDIR+D_CX; - p->from.offset = ctxt->tlsoffset + 0; - p->to.type = D_CX; - break; + next = p->link; + progedit(ctxt, p); + while(p->link != next) + p = p->link; - default: - p->as = AMOVL; - p->from.type = D_INDIR+D_GS; - p->from.offset = ctxt->tlsoffset + 0; - p->to.type = D_CX; - } + if(p->from.index == D_TLS) + p->from.scale = 2; + return p; } diff --git a/src/liblink/objfile.c b/src/liblink/objfile.c index 2b11add3b6..c7700cc25c 100644 --- a/src/liblink/objfile.c +++ b/src/liblink/objfile.c @@ -274,6 +274,7 @@ writesym(Link *ctxt, Biobuf *b, LSym *s) Pcln *pc; Prog *p; Auto *a; + char *name; if(ctxt->debugasm) { Bprint(ctxt->bso, "%s ", s->name); @@ -308,7 +309,10 @@ writesym(Link *ctxt, Biobuf *b, LSym *s) } for(i=0; i<s->nr; i++) { r = &s->r[i]; - Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, r->sym->name, (vlong)r->add); + name = ""; + if(r->sym != nil) + name = r->sym->name; + Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, name, (vlong)r->add); } } diff --git a/src/liblink/sym.c b/src/liblink/sym.c index 3990f7200e..29fc036bcb 100644 --- a/src/liblink/sym.c +++ b/src/liblink/sym.c @@ -118,6 +118,7 @@ linknew(LinkArch *arch) sysfatal("unknown goos %s", getgoos()); // Record thread-local storage offset. + // TODO(rsc): Move tlsoffset back into the linker. switch(ctxt->headtype) { default: sysfatal("unknown thread-local storage offset for %s", headstr(ctxt->headtype)); |
