diff options
| author | Joel Sing <joel@sing.id.au> | 2021-08-26 01:33:29 +1000 |
|---|---|---|
| committer | Joel Sing <joel@sing.id.au> | 2021-10-02 17:29:46 +0000 |
| commit | 3bbc82371eb801ce489f77359f0badc8e469c26d (patch) | |
| tree | 55c98b96db5bbda6ff7a40fbec2fd52e2f97d6cf /src/cmd/internal/obj/riscv | |
| parent | a7fe161ccceb330cd1f19cd103a61f8deacbbdc3 (diff) | |
| download | go-3bbc82371eb801ce489f77359f0badc8e469c26d.tar.xz | |
cmd/internal/obj/riscv,cmd/link/internal/riscv64: add call trampolines for riscv64
CALL and JMP on riscv64 are currently implemented as an AUIPC+JALR pair. This means
that every call requires two instructions and makes use of the REG_TMP register,
even when the symbol would be directly reachable via a single JAL instruction.
Add support for call trampolines - CALL and JMP are now implemented as a single JAL
instruction, with the linker generating trampolines in the case where the symbol is
not reachable (more than +/-1MiB from the JAL instruction), is an unknown symbol or
does not yet have an address assigned. Each trampoline contains an AUIPC+JALR pair,
which the relocation is applied to.
Due to the limited reachability of the JAL instruction, combined with the way that
the Go linker currently assigns symbol addresses, there are cases where a call is to
a symbol that has no address currently assigned. In this situation we have to assume
that a trampoline will be required, however we can patch this up during relocation,
potentially calling directly instead. This means that we will end up with trampolines
that are unused. In the case of the Go binary, there are around 3,500 trampolines of
which approximately 2,300 are unused (around 9200 bytes of machine instructions).
Overall, this removes over 72,000 AUIPC instructions from the Go binary.
Change-Id: I2d9ecfb85dfc285c7729a3cd0b3a77b6f6c98be0
Reviewed-on: https://go-review.googlesource.com/c/go/+/345051
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'src/cmd/internal/obj/riscv')
| -rw-r--r-- | src/cmd/internal/obj/riscv/cpu.go | 12 | ||||
| -rw-r--r-- | src/cmd/internal/obj/riscv/obj.go | 88 |
2 files changed, 52 insertions, 48 deletions
diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go index a258367ae9..ed88f621d9 100644 --- a/src/cmd/internal/obj/riscv/cpu.go +++ b/src/cmd/internal/obj/riscv/cpu.go @@ -260,6 +260,10 @@ const ( // corresponding *obj.Prog uses the temporary register. USES_REG_TMP = 1 << iota + // NEED_CALL_RELOC is set on JAL instructions to indicate that a + // R_RISCV_CALL relocation is needed. + NEED_CALL_RELOC + // NEED_PCREL_ITYPE_RELOC is set on AUIPC instructions to indicate that // it is the first instruction in an AUIPC + I-type pair that needs a // R_RISCV_PCREL_ITYPE relocation. @@ -632,6 +636,10 @@ var unaryDst = map[obj.As]bool{ // Instruction encoding masks. const ( + // JTypeImmMask is a mask including only the immediate portion of + // J-type instructions. + JTypeImmMask = 0xfffff000 + // ITypeImmMask is a mask including only the immediate portion of // I-type instructions. ITypeImmMask = 0xfff00000 @@ -643,8 +651,4 @@ const ( // UTypeImmMask is a mask including only the immediate portion of // U-type instructions. UTypeImmMask = 0xfffff000 - - // UJTypeImmMask is a mask including only the immediate portion of - // UJ-type instructions. - UJTypeImmMask = UTypeImmMask ) diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index f0ea21de97..b346b13577 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -30,41 +30,19 @@ import ( func buildop(ctxt *obj.Link) {} -// jalrToSym replaces p with a set of Progs needed to jump to the Sym in p. -// lr is the link register to use for the JALR. -// p must be a CALL, JMP or RET. -func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog { - if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET && p.As != obj.ADUFFZERO && p.As != obj.ADUFFCOPY { - ctxt.Diag("unexpected Prog in jalrToSym: %v", p) - return p +func jalToSym(ctxt *obj.Link, p *obj.Prog, lr int16) { + switch p.As { + case obj.ACALL, obj.AJMP, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY: + default: + ctxt.Diag("unexpected Prog in jalToSym: %v", p) + return } - // TODO(jsing): Consider using a single JAL instruction and teaching - // the linker to provide trampolines for the case where the destination - // offset is too large. This would potentially reduce instructions for - // the common case, but would require three instructions to go via the - // trampoline. - - to := p.To - - p.As = AAUIPC - p.Mark |= NEED_PCREL_ITYPE_RELOC - p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: to.Offset, Sym: to.Sym}) - p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0} - p.Reg = obj.REG_NONE - p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP} - p = obj.Appendp(p, newprog) - - // Leave Sym only for the CALL reloc in assemble. - p.As = AJALR + p.As = AJAL + p.Mark |= NEED_CALL_RELOC p.From.Type = obj.TYPE_REG p.From.Reg = lr p.Reg = obj.REG_NONE - p.To.Type = obj.TYPE_REG - p.To.Reg = REG_TMP - p.To.Sym = to.Sym - - return p } // progedit is called individually for each *obj.Prog. It normalizes instruction @@ -531,7 +509,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY: switch p.To.Type { case obj.TYPE_MEM: - jalrToSym(ctxt, p, newprog, REG_LR) + jalToSym(ctxt, p, REG_LR) } case obj.AJMP: @@ -539,8 +517,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { case obj.TYPE_MEM: switch p.To.Name { case obj.NAME_EXTERN, obj.NAME_STATIC: - // JMP to symbol. - jalrToSym(ctxt, p, newprog, REG_ZERO) + jalToSym(ctxt, p, REG_ZERO) } } @@ -566,7 +543,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if retJMP != nil { p.As = obj.ARET p.To.Sym = retJMP - p = jalrToSym(ctxt, p, newprog, REG_ZERO) + jalToSym(ctxt, p, REG_ZERO) } else { p.As = AJALR p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO} @@ -640,8 +617,9 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { rescan = true } case AJAL: + // Linker will handle the intersymbol case and trampolines. if p.To.Target() == nil { - panic("intersymbol jumps should be expressed as AUIPC+JALR") + break } offset := p.To.Target().Pc - p.Pc if offset < -(1<<20) || (1<<20) <= offset { @@ -676,7 +654,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // instructions will break everything--don't do it! for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { - case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ, AJAL: + case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ: switch p.To.Type { case obj.TYPE_BRANCH: p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc @@ -684,6 +662,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { panic("unhandled type") } + case AJAL: + // Linker will handle the intersymbol case and trampolines. + if p.To.Target() != nil { + p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc + } + case AAUIPC: if p.From.Type == obj.TYPE_BRANCH { low, high, err := Split32BitImmediate(p.From.Target().Pc - p.Pc) @@ -802,7 +786,7 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgA if to_more != nil { to_more.To.SetTarget(p) } - p = jalrToSym(ctxt, p, newprog, REG_X5) + jalToSym(ctxt, p, REG_X5) // JMP start p = obj.Appendp(p, newprog) @@ -1187,6 +1171,11 @@ func encodeU(ins *instruction) uint32 { return imm<<12 | rd<<7 | enc.opcode } +// encodeJImmediate encodes an immediate for a J-type RISC-V instruction. +func encodeJImmediate(imm uint32) uint32 { + return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12 +} + // encodeJ encodes a J-type RISC-V instruction. func encodeJ(ins *instruction) uint32 { imm := immI(ins.as, ins.imm, 21) @@ -1195,7 +1184,7 @@ func encodeJ(ins *instruction) uint32 { if enc == nil { panic("encodeJ: could not encode instruction") } - return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12 | rd<<7 | enc.opcode + return encodeJImmediate(imm) | rd<<7 | enc.opcode } func encodeRawIns(ins *instruction) uint32 { @@ -1207,6 +1196,16 @@ func encodeRawIns(ins *instruction) uint32 { return uint32(ins.imm) } +func EncodeJImmediate(imm int64) (int64, error) { + if !immIFits(imm, 21) { + return 0, fmt.Errorf("immediate %#x does not fit in 21 bits", imm) + } + if imm&1 != 0 { + return 0, fmt.Errorf("immediate %#x is not a multiple of two", imm) + } + return int64(encodeJImmediate(uint32(imm))), nil +} + func EncodeIImmediate(imm int64) (int64, error) { if !immIFits(imm, 12) { return 0, fmt.Errorf("immediate %#x does not fit in 12 bits", imm) @@ -2035,17 +2034,18 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { for p := cursym.Func().Text; p != nil; p = p.Link { switch p.As { - case AJALR: - if p.To.Sym != nil { - // This is a CALL/JMP. We add a relocation only - // for linker stack checking. No actual - // relocation is needed. + case AJAL: + if p.Mark&NEED_CALL_RELOC == NEED_CALL_RELOC { rel := obj.Addrel(cursym) rel.Off = int32(p.Pc) rel.Siz = 4 rel.Sym = p.To.Sym rel.Add = p.To.Offset - rel.Type = objabi.R_CALLRISCV + rel.Type = objabi.R_RISCV_CALL + } + case AJALR: + if p.To.Sym != nil { + ctxt.Diag("%v: unexpected AJALR with to symbol", p) } case AAUIPC, AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD: |
