aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/link
diff options
context:
space:
mode:
authorJoel Sing <joel@sing.id.au>2021-08-26 01:33:29 +1000
committerJoel Sing <joel@sing.id.au>2021-10-02 17:29:46 +0000
commit3bbc82371eb801ce489f77359f0badc8e469c26d (patch)
tree55c98b96db5bbda6ff7a40fbec2fd52e2f97d6cf /src/cmd/link
parenta7fe161ccceb330cd1f19cd103a61f8deacbbdc3 (diff)
downloadgo-3bbc82371eb801ce489f77359f0badc8e469c26d.tar.xz
cmd/internal/obj/riscv,cmd/link/internal/riscv64: add call trampolines for riscv64
CALL and JMP on riscv64 are currently implemented as an AUIPC+JALR pair. This means that every call requires two instructions and makes use of the REG_TMP register, even when the symbol would be directly reachable via a single JAL instruction. Add support for call trampolines - CALL and JMP are now implemented as a single JAL instruction, with the linker generating trampolines in the case where the symbol is not reachable (more than +/-1MiB from the JAL instruction), is an unknown symbol or does not yet have an address assigned. Each trampoline contains an AUIPC+JALR pair, which the relocation is applied to. Due to the limited reachability of the JAL instruction, combined with the way that the Go linker currently assigns symbol addresses, there are cases where a call is to a symbol that has no address currently assigned. In this situation we have to assume that a trampoline will be required, however we can patch this up during relocation, potentially calling directly instead. This means that we will end up with trampolines that are unused. In the case of the Go binary, there are around 3,500 trampolines of which approximately 2,300 are unused (around 9200 bytes of machine instructions). Overall, this removes over 72,000 AUIPC instructions from the Go binary. Change-Id: I2d9ecfb85dfc285c7729a3cd0b3a77b6f6c98be0 Reviewed-on: https://go-review.googlesource.com/c/go/+/345051 Trust: Joel Sing <joel@sing.id.au> Run-TryBot: Joel Sing <joel@sing.id.au> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'src/cmd/link')
-rw-r--r--src/cmd/link/internal/ld/data.go15
-rw-r--r--src/cmd/link/internal/ld/pcln.go7
-rw-r--r--src/cmd/link/internal/riscv64/asm.go141
-rw-r--r--src/cmd/link/internal/riscv64/obj.go14
4 files changed, 150 insertions, 27 deletions
diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go
index 21169f66ef..8de0e0df1a 100644
--- a/src/cmd/link/internal/ld/data.go
+++ b/src/cmd/link/internal/ld/data.go
@@ -92,10 +92,10 @@ func maxSizeTrampolines(ctxt *Link, ldr *loader.Loader, s loader.Sym, isTramp bo
panic("unreachable")
}
-// detect too-far jumps in function s, and add trampolines if necessary
-// ARM, PPC64 & PPC64LE support trampoline insertion for internal and external linking
-// On PPC64 & PPC64LE the text sections might be split but will still insert trampolines
-// where necessary.
+// Detect too-far jumps in function s, and add trampolines if necessary.
+// ARM, PPC64, PPC64LE and RISCV64 support trampoline insertion for internal
+// and external linking. On PPC64 and PPC64LE the text sections might be split
+// but will still insert trampolines where necessary.
func trampoline(ctxt *Link, s loader.Sym) {
if thearch.Trampoline == nil {
return // no need or no support of trampolines on this arch
@@ -113,7 +113,11 @@ func trampoline(ctxt *Link, s loader.Sym) {
if !ldr.AttrReachable(rs) || ldr.SymType(rs) == sym.Sxxx {
continue // something is wrong. skip it here and we'll emit a better error later
}
- if ldr.SymValue(rs) == 0 && (ldr.SymType(rs) != sym.SDYNIMPORT && ldr.SymType(rs) != sym.SUNDEFEXT) {
+
+ // RISC-V is only able to reach +/-1MiB via a JAL instruction,
+ // which we can readily exceed in the same package. As such, we
+ // need to generate trampolines when the address is unknown.
+ if ldr.SymValue(rs) == 0 && !ctxt.Target.IsRISCV64() && ldr.SymType(rs) != sym.SDYNIMPORT && ldr.SymType(rs) != sym.SUNDEFEXT {
if ldr.SymPkg(s) != "" && ldr.SymPkg(rs) == ldr.SymPkg(s) {
// Symbols in the same package are laid out together.
// Except that if SymPkg(s) == "", it is a host object symbol
@@ -124,7 +128,6 @@ func trampoline(ctxt *Link, s loader.Sym) {
continue // runtime packages are laid out together
}
}
-
thearch.Trampoline(ctxt, ldr, ri, rs, s)
}
}
diff --git a/src/cmd/link/internal/ld/pcln.go b/src/cmd/link/internal/ld/pcln.go
index 5f4724970c..39dd4b916e 100644
--- a/src/cmd/link/internal/ld/pcln.go
+++ b/src/cmd/link/internal/ld/pcln.go
@@ -143,13 +143,8 @@ func computeDeferReturn(ctxt *Link, deferReturnSym, s loader.Sym) uint32 {
switch target.Arch.Family {
case sys.AMD64, sys.I386:
deferreturn--
- case sys.PPC64, sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64:
+ case sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64:
// no change
- case sys.RISCV64:
- // TODO(jsing): The JALR instruction is marked with
- // R_CALLRISCV, whereas the actual reloc is currently
- // one instruction earlier starting with the AUIPC.
- deferreturn -= 4
case sys.S390X:
deferreturn -= 2
default:
diff --git a/src/cmd/link/internal/riscv64/asm.go b/src/cmd/link/internal/riscv64/asm.go
index ef941e52e9..cb53a605d7 100644
--- a/src/cmd/link/internal/riscv64/asm.go
+++ b/src/cmd/link/internal/riscv64/asm.go
@@ -96,10 +96,10 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
}
out.Write64(uint64(r.Xadd))
- case objabi.R_CALLRISCV:
- // Call relocations are currently handled via R_RISCV_PCREL_ITYPE.
- // TODO(jsing): Consider generating elf.R_RISCV_CALL instead of a
- // HI20/LO12_I pair.
+ case objabi.R_RISCV_CALL, objabi.R_RISCV_CALL_TRAMP:
+ out.Write64(uint64(sectoff))
+ out.Write64(uint64(elf.R_RISCV_JAL) | uint64(elfsym)<<32)
+ out.Write64(uint64(r.Xadd))
case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE:
// Find the text symbol for the AUIPC instruction targeted
@@ -156,10 +156,38 @@ func machoreloc1(*sys.Arch, *ld.OutBuf, *loader.Loader, loader.Sym, loader.ExtRe
}
func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loader.Reloc, s loader.Sym, val int64) (o int64, nExtReloc int, ok bool) {
+ rs := r.Sym()
+ pc := ldr.SymValue(s) + int64(r.Off())
+
+ // If the call points to a trampoline, see if we can reach the symbol
+ // directly. This situation can occur when the relocation symbol is
+ // not assigned an address until after the trampolines are generated.
+ if r.Type() == objabi.R_RISCV_CALL_TRAMP {
+ relocs := ldr.Relocs(rs)
+ if relocs.Count() != 1 {
+ ldr.Errorf(s, "trampoline %v has %d relocations", ldr.SymName(rs), relocs.Count())
+ }
+ tr := relocs.At(0)
+ if tr.Type() != objabi.R_RISCV_PCREL_ITYPE {
+ ldr.Errorf(s, "trampoline %v has unexpected relocation %v", ldr.SymName(rs), tr.Type())
+ }
+ trs := tr.Sym()
+ if ldr.SymValue(trs) != 0 && ldr.SymType(trs) != sym.SDYNIMPORT && ldr.SymType(trs) != sym.SUNDEFEXT {
+ trsOff := ldr.SymValue(trs) + tr.Add() - pc
+ if trsOff >= -(1<<20) && trsOff < (1<<20) {
+ r.SetType(objabi.R_RISCV_CALL)
+ r.SetSym(trs)
+ r.SetAdd(tr.Add())
+ rs = trs
+ }
+ }
+
+ }
+
if target.IsExternal() {
switch r.Type() {
- case objabi.R_CALLRISCV:
- return val, 0, true
+ case objabi.R_RISCV_CALL, objabi.R_RISCV_CALL_TRAMP:
+ return val, 1, true
case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE:
return val, 2, true
@@ -168,11 +196,19 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
return val, 0, false
}
- rs := r.Sym()
+ off := ldr.SymValue(rs) + r.Add() - pc
switch r.Type() {
- case objabi.R_CALLRISCV:
- // Nothing to do.
+ case objabi.R_RISCV_CALL, objabi.R_RISCV_CALL_TRAMP:
+ // Generate instruction immediates.
+ imm, err := riscv.EncodeJImmediate(off)
+ if err != nil {
+ ldr.Errorf(s, "cannot encode R_RISCV_CALL relocation offset for %s: %v", ldr.SymName(rs), err)
+ }
+ immMask := int64(riscv.JTypeImmMask)
+
+ val = (val &^ immMask) | int64(imm)
+
return val, 0, true
case objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE:
@@ -186,9 +222,6 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
return ebreakIns<<32 | ebreakIns, 0, true
case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE:
- pc := ldr.SymValue(s) + int64(r.Off())
- off := ldr.SymValue(rs) + r.Add() - pc
-
// Generate AUIPC and second instruction immediates.
low, high, err := riscv.Split32BitImmediate(off)
if err != nil {
@@ -237,8 +270,92 @@ func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant
func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (loader.ExtReloc, bool) {
switch r.Type() {
+ case objabi.R_RISCV_CALL, objabi.R_RISCV_CALL_TRAMP:
+ return ld.ExtrelocSimple(ldr, r), true
+
case objabi.R_RISCV_PCREL_ITYPE, objabi.R_RISCV_PCREL_STYPE, objabi.R_RISCV_TLS_IE_ITYPE, objabi.R_RISCV_TLS_IE_STYPE:
return ld.ExtrelocViaOuterSym(ldr, r, s), true
}
return loader.ExtReloc{}, false
}
+
+func trampoline(ctxt *ld.Link, ldr *loader.Loader, ri int, rs, s loader.Sym) {
+ relocs := ldr.Relocs(s)
+ r := relocs.At(ri)
+
+ switch r.Type() {
+ case objabi.R_RISCV_CALL:
+ pc := ldr.SymValue(s) + int64(r.Off())
+ off := ldr.SymValue(rs) + r.Add() - pc
+
+ // Relocation symbol has an address and is directly reachable,
+ // therefore there is no need for a trampoline.
+ if ldr.SymValue(rs) != 0 && off >= -(1<<20) && off < (1<<20) && (*ld.FlagDebugTramp <= 1 || ldr.SymPkg(s) == ldr.SymPkg(rs)) {
+ break
+ }
+
+ // Relocation symbol is too far for a direct call or has not
+ // yet been given an address. See if an existing trampoline is
+ // reachable and if so, reuse it. Otherwise we need to create
+ // a new trampoline.
+ var tramp loader.Sym
+ for i := 0; ; i++ {
+ oName := ldr.SymName(rs)
+ name := fmt.Sprintf("%s-tramp%d", oName, i)
+ if r.Add() != 0 {
+ name = fmt.Sprintf("%s%+x-tramp%d", oName, r.Add(), i)
+ }
+ tramp = ldr.LookupOrCreateSym(name, int(ldr.SymVersion(rs)))
+ ldr.SetAttrReachable(tramp, true)
+ if ldr.SymType(tramp) == sym.SDYNIMPORT {
+ // Do not reuse trampoline defined in other module.
+ continue
+ }
+ if oName == "runtime.deferreturn" {
+ ldr.SetIsDeferReturnTramp(tramp, true)
+ }
+ if ldr.SymValue(tramp) == 0 {
+ // Either trampoline does not exist or we found one
+ // that does not have an address assigned and will be
+ // laid down immediately after the current function.
+ break
+ }
+
+ trampOff := ldr.SymValue(tramp) - (ldr.SymValue(s) + int64(r.Off()))
+ if trampOff >= -(1<<20) && trampOff < (1<<20) {
+ // An existing trampoline that is reachable.
+ break
+ }
+ }
+ if ldr.SymType(tramp) == 0 {
+ trampb := ldr.MakeSymbolUpdater(tramp)
+ ctxt.AddTramp(trampb)
+ genCallTramp(ctxt.Arch, ctxt.LinkMode, ldr, trampb, rs, int64(r.Add()))
+ }
+ sb := ldr.MakeSymbolUpdater(s)
+ if ldr.SymValue(rs) == 0 {
+ // In this case the target symbol has not yet been assigned an
+ // address, so we have to assume a trampoline is required. Mark
+ // this as a call via a trampoline so that we can potentially
+ // switch to a direct call during relocation.
+ sb.SetRelocType(ri, objabi.R_RISCV_CALL_TRAMP)
+ }
+ relocs := sb.Relocs()
+ r := relocs.At(ri)
+ r.SetSym(tramp)
+ r.SetAdd(0)
+
+ default:
+ ctxt.Errorf(s, "trampoline called with non-jump reloc: %d (%s)", r.Type(), sym.RelocName(ctxt.Arch, r.Type()))
+ }
+}
+
+func genCallTramp(arch *sys.Arch, linkmode ld.LinkMode, ldr *loader.Loader, tramp *loader.SymbolBuilder, target loader.Sym, offset int64) {
+ tramp.AddUint32(arch, 0x00000f97) // AUIPC $0, X31
+ tramp.AddUint32(arch, 0x000f8067) // JALR X0, (X31)
+
+ r, _ := tramp.AddRel(objabi.R_RISCV_PCREL_ITYPE)
+ r.SetSiz(8)
+ r.SetSym(target)
+ r.SetAdd(offset)
+}
diff --git a/src/cmd/link/internal/riscv64/obj.go b/src/cmd/link/internal/riscv64/obj.go
index 917324d922..557e8932c9 100644
--- a/src/cmd/link/internal/riscv64/obj.go
+++ b/src/cmd/link/internal/riscv64/obj.go
@@ -27,9 +27,17 @@ func Init() (*sys.Arch, ld.Arch) {
Elfreloc1: elfreloc1,
ElfrelocSize: 24,
Elfsetupplt: elfsetupplt,
- Gentext: gentext,
- GenSymsLate: genSymsLate,
- Machoreloc1: machoreloc1,
+
+ // TrampLimit is set such that we always run the trampoline
+ // generation code. This is necessary since calls to external
+ // symbols require the use of trampolines, regardless of the
+ // text size.
+ TrampLimit: 1,
+ Trampoline: trampoline,
+
+ Gentext: gentext,
+ GenSymsLate: genSymsLate,
+ Machoreloc1: machoreloc1,
Linuxdynld: "/lib/ld.so.1",