diff options
| author | Keith Randall <khr@golang.org> | 2025-05-17 15:05:56 -0700 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2025-10-06 14:11:41 -0700 |
| commit | 719dfcf8a8478d70360bf3c34c0e920be7b32994 (patch) | |
| tree | d58aaf3289de3bb18901e34b336da46b425f8075 /src/cmd | |
| parent | f3312124c2370c2f64a7f9ad29732ec30209647a (diff) | |
| download | go-719dfcf8a8478d70360bf3c34c0e920be7b32994.tar.xz | |
cmd/compile: redo arm64 LR/FP save and restore
Instead of storing LR (the return address) at 0(SP) and the FP
(parent's frame pointer) at -8(SP), store them at framesize-8(SP)
and framesize-16(SP), respectively.
We push and pop data onto the stack such that we're never accessing
anything below SP.
The prolog/epilog lengths are unchanged (3 insns for a typical prolog,
2 for a typical epilog).
We use 8 bytes more per frame.
Typical prologue:
STP.W (FP, LR), -16(SP)
MOVD SP, FP
SUB $C, SP
Typical epilogue:
ADD $C, SP
LDP.P 16(SP), (FP, LR)
RET
The previous word where we stored LR, at 0(SP), is now unused.
We could repurpose that slot for storing a local variable.
The new prolog and epilog instructions are recognized by libunwind,
so pc-sampling tools like perf should now be accurate. (TODO: except
maybe after the first RET instruction? Have to look into that.)
Update #73753 (fixes, for arm64)
Update #57302 (Quim thinks this will help on that issue)
Change-Id: I4800036a9a9a08aaaf35d9f99de79a36cf37ebb8
Reviewed-on: https://go-review.googlesource.com/c/go/+/674615
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/cmd')
| -rw-r--r-- | src/cmd/compile/abi-internal.md | 12 | ||||
| -rw-r--r-- | src/cmd/compile/internal/arm64/ggen.go | 10 | ||||
| -rw-r--r-- | src/cmd/compile/internal/arm64/ssa.go | 2 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssagen/pgen.go | 6 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssagen/ssa.go | 3 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 12 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/obj7.go | 314 | ||||
| -rw-r--r-- | src/cmd/link/internal/amd64/obj.go | 19 | ||||
| -rw-r--r-- | src/cmd/link/internal/arm64/obj.go | 23 | ||||
| -rw-r--r-- | src/cmd/link/internal/ld/dwarf.go | 7 | ||||
| -rw-r--r-- | src/cmd/link/internal/ld/lib.go | 4 | ||||
| -rw-r--r-- | src/cmd/link/internal/ld/stackcheck.go | 5 | ||||
| -rw-r--r-- | src/cmd/link/internal/x86/obj.go | 15 |
13 files changed, 175 insertions, 257 deletions
diff --git a/src/cmd/compile/abi-internal.md b/src/cmd/compile/abi-internal.md index eae230dc07..490e1affb7 100644 --- a/src/cmd/compile/abi-internal.md +++ b/src/cmd/compile/abi-internal.md @@ -576,19 +576,19 @@ A function's stack frame, after the frame is created, is laid out as follows: +------------------------------+ + | return PC | + | frame pointer on entry | ← R29 points to | ... locals ... | | ... outgoing arguments ... | - | return PC | ← RSP points to - | frame pointer on entry | + | unused word | ← RSP points to +------------------------------+ ↓ lower addresses The "return PC" is loaded to the link register, R30, as part of the arm64 `CALL` operation. -On entry, a function subtracts from RSP to open its stack frame, and -saves the values of R30 and R29 at the bottom of the frame. -Specifically, R30 is saved at 0(RSP) and R29 is saved at -8(RSP), -after RSP is updated. +On entry, a function pushes R30 (the return address) and R29 +(the caller's frame pointer) onto the bottom of the stack. It then +subtracts a constant from RSP to open its stack frame. A leaf function that does not require any stack space may omit the saved R30 and R29. diff --git a/src/cmd/compile/internal/arm64/ggen.go b/src/cmd/compile/internal/arm64/ggen.go index 1402746700..6ba56b992e 100644 --- a/src/cmd/compile/internal/arm64/ggen.go +++ b/src/cmd/compile/internal/arm64/ggen.go @@ -11,10 +11,12 @@ import ( ) func padframe(frame int64) int64 { - // arm64 requires that the frame size (not counting saved FP&LR) - // be 16 bytes aligned. If not, pad it. - if frame%16 != 0 { - frame += 16 - (frame % 16) + // arm64 requires frame sizes here that are 8 mod 16. + // With the additional (unused) slot at the bottom of the frame, + // that makes an aligned 16 byte frame. + // Adding a save region for LR+FP does not change the alignment. + if frame != 0 { + frame += (-(frame + 8)) & 15 } return frame } diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 7bc0e536e9..9f79a740c6 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -221,7 +221,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { for i := 0; i < len(args); i++ { a := args[i] - // Offset by size of the saved LR slot. + // Offset by size of the unused slot before start of args. addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize) // Look for double-register operations if we can. if i < len(args)-1 { diff --git a/src/cmd/compile/internal/ssagen/pgen.go b/src/cmd/compile/internal/ssagen/pgen.go index 0a2010363f..f0776172b9 100644 --- a/src/cmd/compile/internal/ssagen/pgen.go +++ b/src/cmd/compile/internal/ssagen/pgen.go @@ -393,10 +393,16 @@ func StackOffset(slot ssa.LocalSlot) int32 { case ir.PAUTO: off = n.FrameOffset() if base.Ctxt.Arch.FixedFrameSize == 0 { + // x86 return address off -= int64(types.PtrSize) } if buildcfg.FramePointerEnabled { + // frame pointer off -= int64(types.PtrSize) + if buildcfg.GOARCH == "arm64" { + // arm64 return address also + off -= int64(types.PtrSize) + } } } return int32(off + slot.Off) diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 1e2159579d..107447f04c 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -7150,6 +7150,7 @@ func defframe(s *State, e *ssafn, f *ssa.Func) { // Insert code to zero ambiguously live variables so that the // garbage collector only sees initialized values when it // looks for pointers. + // Note: lo/hi are offsets from varp and will be negative. var lo, hi int64 // Opaque state for backend to use. Current backends use it to @@ -7157,7 +7158,7 @@ func defframe(s *State, e *ssafn, f *ssa.Func) { var state uint32 // Iterate through declarations. Autos are sorted in decreasing - // frame offset order. + // frame offset order (least negative to most negative). for _, n := range e.curfn.Dcl { if !n.Needzero() { continue diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 743d09a319..281d705a3e 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -51,7 +51,6 @@ type ctxt7 struct { blitrl *obj.Prog elitrl *obj.Prog autosize int32 - extrasize int32 instoffset int64 pc int64 pool struct { @@ -1122,8 +1121,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Diag("arm64 ops not initialized, call arm64.buildop first") } - c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)} - p.To.Offset &= 0xffffffff // extrasize is no longer needed + c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)} // Process literal pool and allocate initial program counter for each Prog, before // generating branch veneers. @@ -2119,8 +2117,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int { // a.Offset is still relative to pseudo-SP. a.Reg = obj.REG_NONE } - // The frame top 8 or 16 bytes are for FP - c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) + // The frame top 16 bytes are for LR/FP + c.instoffset = int64(c.autosize) + a.Offset - extrasize return autoclass(c.instoffset) case obj.NAME_PARAM: @@ -2180,8 +2178,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int { // a.Offset is still relative to pseudo-SP. a.Reg = obj.REG_NONE } - // The frame top 8 or 16 bytes are for FP - c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) + // The frame top 16 bytes are for LR/FP + c.instoffset = int64(c.autosize) + a.Offset - extrasize case obj.NAME_PARAM: if a.Reg == REGSP { diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index 2583e46354..a697426145 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -36,7 +36,6 @@ import ( "cmd/internal/src" "cmd/internal/sys" "internal/abi" - "internal/buildcfg" "log" "math" ) @@ -472,6 +471,8 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) { obj.Nopout(p) } +const extrasize = 16 // space needed in the frame for LR+FP + func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return @@ -521,33 +522,26 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.autosize = int32(textstksiz) if p.Mark&LEAF != 0 && c.autosize == 0 { - // A leaf function with no locals has no frame. + // A leaf function with no locals needs no frame. p.From.Sym.Set(obj.AttrNoFrame, true) } if !p.From.Sym.NoFrame() { // If there is a stack frame at all, it includes - // space to save the LR. + // space for the (now unused) word at [SP:SP+8]. c.autosize += 8 } + // Round up to a multiple of 16. + c.autosize += (-c.autosize) & 15 + if c.autosize != 0 { - extrasize := int32(0) - if c.autosize%16 == 8 { - // Allocate extra 8 bytes on the frame top to save FP - extrasize = 8 - } else if c.autosize&(16-1) == 0 { - // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16 - extrasize = 16 - } else { - c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8) - } + // Allocate an extra 16 bytes at the top of the frame + // to save LR+FP. c.autosize += extrasize c.cursym.Func().Locals += extrasize - // low 32 bits for autosize - // high 32 bits for extrasize - p.To.Offset = int64(c.autosize) | int64(extrasize)<<32 + p.To.Offset = int64(c.autosize) } else { // NOFRAME p.To.Offset = 0 @@ -580,120 +574,72 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var prologueEnd *obj.Prog aoffset := c.autosize - if aoffset > 0xf0 { - // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned. - // so the maximum aoffset value is 0xf0. - aoffset = 0xf0 + if aoffset < 16 { + log.Fatalf("aoffset too small %d", aoffset) } - // Frame is non-empty. Make sure to save link register, even if - // it is a leaf function, so that traceback works. q = p - if c.autosize > aoffset { - // Frame size is too large for a MOVD.W instruction. Store the frame pointer - // register and link register before decrementing SP, so if a signal comes - // during the execution of the function prologue, the traceback code will - // not see a half-updated stack frame. - - // SUB $autosize, RSP, R20 - q1 = obj.Appendp(q, c.newprog) - q1.Pos = p.Pos - q1.As = ASUB - q1.From.Type = obj.TYPE_CONST - q1.From.Offset = int64(c.autosize) - q1.Reg = REGSP - q1.To.Type = obj.TYPE_REG - q1.To.Reg = REG_R20 - prologueEnd = q1 - - // STP (R29, R30), -8(R20) - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = ASTP - q1.From.Type = obj.TYPE_REGREG - q1.From.Reg = REGFP - q1.From.Offset = REGLINK - q1.To.Type = obj.TYPE_MEM - q1.To.Reg = REG_R20 - q1.To.Offset = -8 + // Store return address and frame pointer at the top of the stack frame. + // STP.W (R29, R30), -16(SP) + q1 = obj.Appendp(q, c.newprog) + q1.Pos = p.Pos + q1.As = ASTP + q1.From.Type = obj.TYPE_REGREG + q1.From.Reg = REGFP + q1.From.Offset = REGLINK + q1.To.Type = obj.TYPE_MEM + q1.To.Reg = REG_RSP + q1.To.Offset = -16 + q1.Scond = C_XPRE - // This is not async preemptible, as if we open a frame - // at the current SP, it will clobber the saved LR. - q1 = c.ctxt.StartUnsafePoint(q1, c.newprog) + prologueEnd = q1 - // MOVD R20, RSP - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = AMOVD - q1.From.Type = obj.TYPE_REG - q1.From.Reg = REG_R20 - q1.To.Type = obj.TYPE_REG - q1.To.Reg = REGSP - q1.Spadj = c.autosize + // Update frame pointer + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = AMOVD + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REGSP + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGFP - q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1) + // Allocate additional frame space. + adj := aoffset - 16 + if adj > 0 { + // SUB $autosize-16, RSP + if adj < 1<<12 { + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = ASUB + q1.From.Type = obj.TYPE_CONST + q1.From.Offset = int64(adj) + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGSP + } else { + // Constant too big for atomic subtract. + // Materialize in tmp register first. + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = AMOVD + q1.From.Type = obj.TYPE_CONST + q1.From.Offset = int64(adj) + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGTMP - if buildcfg.GOOS == "ios" { - // iOS does not support SA_ONSTACK. We will run the signal handler - // on the G stack. If we write below SP, it may be clobbered by - // the signal handler. So we save FP and LR after decrementing SP. - // STP (R29, R30), -8(RSP) q1 = obj.Appendp(q1, c.newprog) q1.Pos = p.Pos - q1.As = ASTP - q1.From.Type = obj.TYPE_REGREG - q1.From.Reg = REGFP - q1.From.Offset = REGLINK - q1.To.Type = obj.TYPE_MEM + q1.As = ASUB + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REGTMP + q1.To.Type = obj.TYPE_REG q1.To.Reg = REGSP - q1.To.Offset = -8 } - } else { - // small frame, update SP and save LR in a single MOVD.W instruction. - // So if a signal comes during the execution of the function prologue, - // the traceback code will not see a half-updated stack frame. - // Also, on Linux, in a cgo binary we may get a SIGSETXID signal - // early on before the signal stack is set, as glibc doesn't allow - // us to block SIGSETXID. So it is important that we don't write below - // the SP until the signal stack is set. - // Luckily, all the functions from thread entry to setting the signal - // stack have small frames. - q1 = obj.Appendp(q, c.newprog) - q1.As = AMOVD - q1.Pos = p.Pos - q1.From.Type = obj.TYPE_REG - q1.From.Reg = REGLINK - q1.To.Type = obj.TYPE_MEM - q1.Scond = C_XPRE - q1.To.Offset = int64(-aoffset) - q1.To.Reg = REGSP - q1.Spadj = aoffset - - prologueEnd = q1 - - // Frame pointer. - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = AMOVD - q1.From.Type = obj.TYPE_REG - q1.From.Reg = REGFP - q1.To.Type = obj.TYPE_MEM - q1.To.Reg = REGSP - q1.To.Offset = -8 + q1.Spadj = adj } prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd) - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = ASUB - q1.From.Type = obj.TYPE_CONST - q1.From.Offset = 8 - q1.Reg = REGSP - q1.To.Type = obj.TYPE_REG - q1.To.Reg = REGFP - case obj.ARET: nocache(p) if p.From.Type == obj.TYPE_CONST { @@ -707,105 +653,56 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } p.To = obj.Addr{} aoffset := c.autosize - if c.cursym.Func().Text.Mark&LEAF != 0 { - if aoffset != 0 { - // Restore frame pointer. - // ADD $framesize-8, RSP, R29 - p.As = AADD - p.From.Type = obj.TYPE_CONST - p.From.Offset = int64(c.autosize) - 8 - p.Reg = REGSP - p.To.Type = obj.TYPE_REG - p.To.Reg = REGFP - - // Pop stack frame. - // ADD $framesize, RSP, RSP - p = obj.Appendp(p, c.newprog) - p.As = AADD - p.From.Type = obj.TYPE_CONST - p.From.Offset = int64(c.autosize) - p.To.Type = obj.TYPE_REG - p.To.Reg = REGSP - p.Spadj = -c.autosize + if aoffset > 0 { + if aoffset < 16 { + log.Fatalf("aoffset too small %d", aoffset) } - } else if aoffset <= 0xF0 { - // small frame, restore LR and update SP in a single MOVD.P instruction. - // There is no correctness issue to use a single LDP for LR and FP, - // but the instructions are not pattern matched with the prologue's - // MOVD.W and MOVD, which may cause performance issue in - // store-forwarding. + adj := aoffset - 16 + if adj > 0 { + if adj < 1<<12 { + // ADD $adj, RSP, RSP + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(adj) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + } else { + // Put frame size in a separate register and + // add it in with a single instruction, + // so we never have a partial frame during + // the epilog. See issue 73259. - // MOVD -8(RSP), R29 - p.As = AMOVD - p.From.Type = obj.TYPE_MEM - p.From.Reg = REGSP - p.From.Offset = -8 - p.To.Type = obj.TYPE_REG - p.To.Reg = REGFP - p = obj.Appendp(p, c.newprog) + // MOVD $adj, REGTMP + p.As = AMOVD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(adj) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGTMP + // ADD REGTMP, RSP, RSP + p = obj.Appendp(p, c.newprog) + p.As = AADD + p.From.Type = obj.TYPE_REG + p.From.Reg = REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + } + p.Spadj = -adj + } - // MOVD.P offset(RSP), R30 - p.As = AMOVD - p.From.Type = obj.TYPE_MEM - p.Scond = C_XPOST - p.From.Offset = int64(aoffset) - p.From.Reg = REGSP - p.To.Type = obj.TYPE_REG - p.To.Reg = REGLINK - p.Spadj = -aoffset - } else { - // LDP -8(RSP), (R29, R30) + // Pop LR+FP. + // LDP.P 16(RSP), (R29, R30) + if p.As != obj.ARET { + p = obj.Appendp(p, c.newprog) + } p.As = ALDP p.From.Type = obj.TYPE_MEM - p.From.Offset = -8 p.From.Reg = REGSP + p.From.Offset = 16 + p.Scond = C_XPOST p.To.Type = obj.TYPE_REGREG p.To.Reg = REGFP p.To.Offset = REGLINK - - if aoffset < 1<<12 { - // ADD $aoffset, RSP, RSP - q = newprog() - q.As = AADD - q.From.Type = obj.TYPE_CONST - q.From.Offset = int64(aoffset) - q.To.Type = obj.TYPE_REG - q.To.Reg = REGSP - q.Spadj = -aoffset - q.Pos = p.Pos - q.Link = p.Link - p.Link = q - p = q - } else { - // Put frame size in a separate register and - // add it in with a single instruction, - // so we never have a partial frame during - // the epilog. See issue 73259. - - // MOVD $aoffset, REGTMP - q = newprog() - q.As = AMOVD - q.From.Type = obj.TYPE_CONST - q.From.Offset = int64(aoffset) - q.To.Type = obj.TYPE_REG - q.To.Reg = REGTMP - q.Pos = p.Pos - q.Link = p.Link - p.Link = q - p = q - // ADD REGTMP, RSP, RSP - q = newprog() - q.As = AADD - q.From.Type = obj.TYPE_REG - q.From.Reg = REGTMP - q.To.Type = obj.TYPE_REG - q.To.Reg = REGSP - q.Spadj = -aoffset - q.Pos = p.Pos - q.Link = p.Link - p.Link = q - p = q - } + p.Spadj = -16 } // If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC', @@ -868,10 +765,11 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Type = obj.TYPE_REG p.From.Reg = REGLINK } else { - /* MOVD (RSP), Rd */ + /* MOVD framesize-8(RSP), Rd */ p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP + p.From.Offset = int64(c.autosize - 8) } } if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { @@ -906,6 +804,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Reg = int16(REG_LSL + r + (shift&7)<<5) p.From.Offset = 0 } + if p.To.Type == obj.TYPE_MEM && p.To.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) { + p.Spadj += int32(-p.To.Offset) + } + if p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) { + p.Spadj += int32(-p.From.Offset) + } } } diff --git a/src/cmd/link/internal/amd64/obj.go b/src/cmd/link/internal/amd64/obj.go index 3a6141b909..761496549f 100644 --- a/src/cmd/link/internal/amd64/obj.go +++ b/src/cmd/link/internal/amd64/obj.go @@ -51,15 +51,16 @@ func Init() (*sys.Arch, ld.Arch) { Plan9Magic: uint32(4*26*26 + 7), Plan9_64Bit: true, - Adddynrel: adddynrel, - Archinit: archinit, - Archreloc: archreloc, - Archrelocvariant: archrelocvariant, - Gentext: gentext, - Machoreloc1: machoreloc1, - MachorelocSize: 8, - PEreloc1: pereloc1, - TLSIEtoLE: tlsIEtoLE, + Adddynrel: adddynrel, + Archinit: archinit, + Archreloc: archreloc, + Archrelocvariant: archrelocvariant, + Gentext: gentext, + Machoreloc1: machoreloc1, + MachorelocSize: 8, + PEreloc1: pereloc1, + TLSIEtoLE: tlsIEtoLE, + ReturnAddressAtTopOfFrame: true, ELF: ld.ELFArch{ Linuxdynld: "/lib64/ld-linux-x86-64.so.2", diff --git a/src/cmd/link/internal/arm64/obj.go b/src/cmd/link/internal/arm64/obj.go index 3d358155ba..e1e4ade818 100644 --- a/src/cmd/link/internal/arm64/obj.go +++ b/src/cmd/link/internal/arm64/obj.go @@ -47,17 +47,18 @@ func Init() (*sys.Arch, ld.Arch) { Dwarfreglr: dwarfRegLR, TrampLimit: 0x7c00000, // 26-bit signed offset * 4, leave room for PLT etc. - Adddynrel: adddynrel, - Archinit: archinit, - Archreloc: archreloc, - Archrelocvariant: archrelocvariant, - Extreloc: extreloc, - Gentext: gentext, - GenSymsLate: gensymlate, - Machoreloc1: machoreloc1, - MachorelocSize: 8, - PEreloc1: pereloc1, - Trampoline: trampoline, + Adddynrel: adddynrel, + Archinit: archinit, + Archreloc: archreloc, + Archrelocvariant: archrelocvariant, + Extreloc: extreloc, + Gentext: gentext, + GenSymsLate: gensymlate, + Machoreloc1: machoreloc1, + MachorelocSize: 8, + PEreloc1: pereloc1, + Trampoline: trampoline, + ReturnAddressAtTopOfFrame: true, ELF: ld.ELFArch{ Androiddynld: "/system/bin/linker64", diff --git a/src/cmd/link/internal/ld/dwarf.go b/src/cmd/link/internal/ld/dwarf.go index 0003938ef2..c4d12a5488 100644 --- a/src/cmd/link/internal/ld/dwarf.go +++ b/src/cmd/link/internal/ld/dwarf.go @@ -1544,9 +1544,14 @@ func (d *dwctxt) writeframes(fs loader.Sym) dwarfSecInfo { if pcsp.Value > 0 { // The return address is preserved at (CFA-frame_size) // after a stack frame has been allocated. + off := -spdelta + if thearch.ReturnAddressAtTopOfFrame { + // Except arm64, which has it at the top of frame. + off = -int64(d.arch.PtrSize) + } deltaBuf = append(deltaBuf, dwarf.DW_CFA_offset_extended_sf) deltaBuf = dwarf.AppendUleb128(deltaBuf, uint64(thearch.Dwarfreglr)) - deltaBuf = dwarf.AppendSleb128(deltaBuf, -spdelta/dataAlignmentFactor) + deltaBuf = dwarf.AppendSleb128(deltaBuf, off/dataAlignmentFactor) } else { // The return address is restored into the link register // when a stack frame has been de-allocated. diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go index 2c861129b5..5f5ebfc1d9 100644 --- a/src/cmd/link/internal/ld/lib.go +++ b/src/cmd/link/internal/ld/lib.go @@ -263,6 +263,10 @@ type Arch struct { // optional override for assignAddress AssignAddress func(ldr *loader.Loader, sect *sym.Section, n int, s loader.Sym, va uint64, isTramp bool) (*sym.Section, int, uint64) + // Reports whether the return address is stored at the top (highest address) + // of the stack frame. + ReturnAddressAtTopOfFrame bool + // ELF specific information. ELF ELFArch } diff --git a/src/cmd/link/internal/ld/stackcheck.go b/src/cmd/link/internal/ld/stackcheck.go index 98e7edaeb1..14cd3a2238 100644 --- a/src/cmd/link/internal/ld/stackcheck.go +++ b/src/cmd/link/internal/ld/stackcheck.go @@ -9,7 +9,6 @@ import ( "cmd/internal/objabi" "cmd/link/internal/loader" "fmt" - "internal/buildcfg" "sort" "strings" ) @@ -62,10 +61,6 @@ func (ctxt *Link) doStackCheck() { // that there are at least StackLimit bytes available below SP // when morestack returns. limit := objabi.StackNosplit(*flagRace) - sc.callSize - if buildcfg.GOARCH == "arm64" { - // Need an extra 8 bytes below SP to save FP. - limit -= 8 - } // Compute stack heights without any back-tracking information. // This will almost certainly succeed and we can simply diff --git a/src/cmd/link/internal/x86/obj.go b/src/cmd/link/internal/x86/obj.go index 4336f01ea3..a4885fde8f 100644 --- a/src/cmd/link/internal/x86/obj.go +++ b/src/cmd/link/internal/x86/obj.go @@ -50,13 +50,14 @@ func Init() (*sys.Arch, ld.Arch) { Plan9Magic: uint32(4*11*11 + 7), - Adddynrel: adddynrel, - Archinit: archinit, - Archreloc: archreloc, - Archrelocvariant: archrelocvariant, - Gentext: gentext, - Machoreloc1: machoreloc1, - PEreloc1: pereloc1, + Adddynrel: adddynrel, + Archinit: archinit, + Archreloc: archreloc, + Archrelocvariant: archrelocvariant, + Gentext: gentext, + Machoreloc1: machoreloc1, + PEreloc1: pereloc1, + ReturnAddressAtTopOfFrame: true, ELF: ld.ELFArch{ Linuxdynld: "/lib/ld-linux.so.2", |
