diff options
| author | Keith Randall <khr@golang.org> | 2025-10-07 07:58:50 -0700 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2025-10-07 08:21:14 -0700 |
| commit | c938051dd0b80a5c60572d6807270d06ca685d2e (patch) | |
| tree | c4e69ab40d336c192c7cb7ec0a5f50ed54f97d80 /src/cmd/internal/obj | |
| parent | 64699542031b994ec4fdb6de887a94b69a372f9b (diff) | |
| download | go-c938051dd0b80a5c60572d6807270d06ca685d2e.tar.xz | |
Revert "cmd/compile: redo arm64 LR/FP save and restore"
This reverts commit 719dfcf8a8478d70360bf3c34c0e920be7b32994.
Reason for revert: Causing crashes.
Change-Id: I0b8526dd03d82fa074ce4f97f1789eeac702b3eb
Reviewed-on: https://go-review.googlesource.com/c/go/+/709755
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'src/cmd/internal/obj')
| -rw-r--r-- | src/cmd/internal/obj/arm64/asm7.go | 12 | ||||
| -rw-r--r-- | src/cmd/internal/obj/arm64/obj7.go | 314 |
2 files changed, 212 insertions, 114 deletions
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 281d705a3e..743d09a319 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -51,6 +51,7 @@ type ctxt7 struct { blitrl *obj.Prog elitrl *obj.Prog autosize int32 + extrasize int32 instoffset int64 pc int64 pool struct { @@ -1121,7 +1122,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Diag("arm64 ops not initialized, call arm64.buildop first") } - c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)} + c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)} + p.To.Offset &= 0xffffffff // extrasize is no longer needed // Process literal pool and allocate initial program counter for each Prog, before // generating branch veneers. @@ -2117,8 +2119,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int { // a.Offset is still relative to pseudo-SP. a.Reg = obj.REG_NONE } - // The frame top 16 bytes are for LR/FP - c.instoffset = int64(c.autosize) + a.Offset - extrasize + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) return autoclass(c.instoffset) case obj.NAME_PARAM: @@ -2178,8 +2180,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int { // a.Offset is still relative to pseudo-SP. a.Reg = obj.REG_NONE } - // The frame top 16 bytes are for LR/FP - c.instoffset = int64(c.autosize) + a.Offset - extrasize + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) case obj.NAME_PARAM: if a.Reg == REGSP { diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index a697426145..2583e46354 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -36,6 +36,7 @@ import ( "cmd/internal/src" "cmd/internal/sys" "internal/abi" + "internal/buildcfg" "log" "math" ) @@ -471,8 +472,6 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) { obj.Nopout(p) } -const extrasize = 16 // space needed in the frame for LR+FP - func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return @@ -522,26 +521,33 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.autosize = int32(textstksiz) if p.Mark&LEAF != 0 && c.autosize == 0 { - // A leaf function with no locals needs no frame. + // A leaf function with no locals has no frame. p.From.Sym.Set(obj.AttrNoFrame, true) } if !p.From.Sym.NoFrame() { // If there is a stack frame at all, it includes - // space for the (now unused) word at [SP:SP+8]. + // space to save the LR. c.autosize += 8 } - // Round up to a multiple of 16. - c.autosize += (-c.autosize) & 15 - if c.autosize != 0 { - // Allocate an extra 16 bytes at the top of the frame - // to save LR+FP. + extrasize := int32(0) + if c.autosize%16 == 8 { + // Allocate extra 8 bytes on the frame top to save FP + extrasize = 8 + } else if c.autosize&(16-1) == 0 { + // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16 + extrasize = 16 + } else { + c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8) + } c.autosize += extrasize c.cursym.Func().Locals += extrasize - p.To.Offset = int64(c.autosize) + // low 32 bits for autosize + // high 32 bits for extrasize + p.To.Offset = int64(c.autosize) | int64(extrasize)<<32 } else { // NOFRAME p.To.Offset = 0 @@ -574,72 +580,120 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var prologueEnd *obj.Prog aoffset := c.autosize - if aoffset < 16 { - log.Fatalf("aoffset too small %d", aoffset) + if aoffset > 0xf0 { + // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned. + // so the maximum aoffset value is 0xf0. + aoffset = 0xf0 } + // Frame is non-empty. Make sure to save link register, even if + // it is a leaf function, so that traceback works. q = p + if c.autosize > aoffset { + // Frame size is too large for a MOVD.W instruction. Store the frame pointer + // register and link register before decrementing SP, so if a signal comes + // during the execution of the function prologue, the traceback code will + // not see a half-updated stack frame. - // Store return address and frame pointer at the top of the stack frame. - // STP.W (R29, R30), -16(SP) - q1 = obj.Appendp(q, c.newprog) - q1.Pos = p.Pos - q1.As = ASTP - q1.From.Type = obj.TYPE_REGREG - q1.From.Reg = REGFP - q1.From.Offset = REGLINK - q1.To.Type = obj.TYPE_MEM - q1.To.Reg = REG_RSP - q1.To.Offset = -16 - q1.Scond = C_XPRE + // SUB $autosize, RSP, R20 + q1 = obj.Appendp(q, c.newprog) + q1.Pos = p.Pos + q1.As = ASUB + q1.From.Type = obj.TYPE_CONST + q1.From.Offset = int64(c.autosize) + q1.Reg = REGSP + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REG_R20 - prologueEnd = q1 + prologueEnd = q1 - // Update frame pointer - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = AMOVD - q1.From.Type = obj.TYPE_REG - q1.From.Reg = REGSP - q1.To.Type = obj.TYPE_REG - q1.To.Reg = REGFP + // STP (R29, R30), -8(R20) + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = ASTP + q1.From.Type = obj.TYPE_REGREG + q1.From.Reg = REGFP + q1.From.Offset = REGLINK + q1.To.Type = obj.TYPE_MEM + q1.To.Reg = REG_R20 + q1.To.Offset = -8 - // Allocate additional frame space. - adj := aoffset - 16 - if adj > 0 { - // SUB $autosize-16, RSP - if adj < 1<<12 { - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = ASUB - q1.From.Type = obj.TYPE_CONST - q1.From.Offset = int64(adj) - q1.To.Type = obj.TYPE_REG - q1.To.Reg = REGSP - } else { - // Constant too big for atomic subtract. - // Materialize in tmp register first. - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = AMOVD - q1.From.Type = obj.TYPE_CONST - q1.From.Offset = int64(adj) - q1.To.Type = obj.TYPE_REG - q1.To.Reg = REGTMP + // This is not async preemptible, as if we open a frame + // at the current SP, it will clobber the saved LR. + q1 = c.ctxt.StartUnsafePoint(q1, c.newprog) + + // MOVD R20, RSP + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = AMOVD + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REG_R20 + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGSP + q1.Spadj = c.autosize + + q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1) + if buildcfg.GOOS == "ios" { + // iOS does not support SA_ONSTACK. We will run the signal handler + // on the G stack. If we write below SP, it may be clobbered by + // the signal handler. So we save FP and LR after decrementing SP. + // STP (R29, R30), -8(RSP) q1 = obj.Appendp(q1, c.newprog) q1.Pos = p.Pos - q1.As = ASUB - q1.From.Type = obj.TYPE_REG - q1.From.Reg = REGTMP - q1.To.Type = obj.TYPE_REG + q1.As = ASTP + q1.From.Type = obj.TYPE_REGREG + q1.From.Reg = REGFP + q1.From.Offset = REGLINK + q1.To.Type = obj.TYPE_MEM q1.To.Reg = REGSP + q1.To.Offset = -8 } - q1.Spadj = adj + } else { + // small frame, update SP and save LR in a single MOVD.W instruction. + // So if a signal comes during the execution of the function prologue, + // the traceback code will not see a half-updated stack frame. + // Also, on Linux, in a cgo binary we may get a SIGSETXID signal + // early on before the signal stack is set, as glibc doesn't allow + // us to block SIGSETXID. So it is important that we don't write below + // the SP until the signal stack is set. + // Luckily, all the functions from thread entry to setting the signal + // stack have small frames. + q1 = obj.Appendp(q, c.newprog) + q1.As = AMOVD + q1.Pos = p.Pos + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REGLINK + q1.To.Type = obj.TYPE_MEM + q1.Scond = C_XPRE + q1.To.Offset = int64(-aoffset) + q1.To.Reg = REGSP + q1.Spadj = aoffset + + prologueEnd = q1 + + // Frame pointer. + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = AMOVD + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REGFP + q1.To.Type = obj.TYPE_MEM + q1.To.Reg = REGSP + q1.To.Offset = -8 } prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd) + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = ASUB + q1.From.Type = obj.TYPE_CONST + q1.From.Offset = 8 + q1.Reg = REGSP + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGFP + case obj.ARET: nocache(p) if p.From.Type == obj.TYPE_CONST { @@ -653,56 +707,105 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } p.To = obj.Addr{} aoffset := c.autosize - if aoffset > 0 { - if aoffset < 16 { - log.Fatalf("aoffset too small %d", aoffset) - } - adj := aoffset - 16 - if adj > 0 { - if adj < 1<<12 { - // ADD $adj, RSP, RSP - p.As = AADD - p.From.Type = obj.TYPE_CONST - p.From.Offset = int64(adj) - p.To.Type = obj.TYPE_REG - p.To.Reg = REGSP - } else { - // Put frame size in a separate register and - // add it in with a single instruction, - // so we never have a partial frame during - // the epilog. See issue 73259. - - // MOVD $adj, REGTMP - p.As = AMOVD - p.From.Type = obj.TYPE_CONST - p.From.Offset = int64(adj) - p.To.Type = obj.TYPE_REG - p.To.Reg = REGTMP - // ADD REGTMP, RSP, RSP - p = obj.Appendp(p, c.newprog) - p.As = AADD - p.From.Type = obj.TYPE_REG - p.From.Reg = REGTMP - p.To.Type = obj.TYPE_REG - p.To.Reg = REGSP - } - p.Spadj = -adj - } + if c.cursym.Func().Text.Mark&LEAF != 0 { + if aoffset != 0 { + // Restore frame pointer. + // ADD $framesize-8, RSP, R29 + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(c.autosize) - 8 + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGFP - // Pop LR+FP. - // LDP.P 16(RSP), (R29, R30) - if p.As != obj.ARET { + // Pop stack frame. + // ADD $framesize, RSP, RSP p = obj.Appendp(p, c.newprog) + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(c.autosize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = -c.autosize } - p.As = ALDP + } else if aoffset <= 0xF0 { + // small frame, restore LR and update SP in a single MOVD.P instruction. + // There is no correctness issue to use a single LDP for LR and FP, + // but the instructions are not pattern matched with the prologue's + // MOVD.W and MOVD, which may cause performance issue in + // store-forwarding. + + // MOVD -8(RSP), R29 + p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP - p.From.Offset = 16 + p.From.Offset = -8 + p.To.Type = obj.TYPE_REG + p.To.Reg = REGFP + p = obj.Appendp(p, c.newprog) + + // MOVD.P offset(RSP), R30 + p.As = AMOVD + p.From.Type = obj.TYPE_MEM p.Scond = C_XPOST + p.From.Offset = int64(aoffset) + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + p.Spadj = -aoffset + } else { + // LDP -8(RSP), (R29, R30) + p.As = ALDP + p.From.Type = obj.TYPE_MEM + p.From.Offset = -8 + p.From.Reg = REGSP p.To.Type = obj.TYPE_REGREG p.To.Reg = REGFP p.To.Offset = REGLINK - p.Spadj = -16 + + if aoffset < 1<<12 { + // ADD $aoffset, RSP, RSP + q = newprog() + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(aoffset) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = -aoffset + q.Pos = p.Pos + q.Link = p.Link + p.Link = q + p = q + } else { + // Put frame size in a separate register and + // add it in with a single instruction, + // so we never have a partial frame during + // the epilog. See issue 73259. + + // MOVD $aoffset, REGTMP + q = newprog() + q.As = AMOVD + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(aoffset) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGTMP + q.Pos = p.Pos + q.Link = p.Link + p.Link = q + p = q + // ADD REGTMP, RSP, RSP + q = newprog() + q.As = AADD + q.From.Type = obj.TYPE_REG + q.From.Reg = REGTMP + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = -aoffset + q.Pos = p.Pos + q.Link = p.Link + p.Link = q + p = q + } } // If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC', @@ -765,11 +868,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Type = obj.TYPE_REG p.From.Reg = REGLINK } else { - /* MOVD framesize-8(RSP), Rd */ + /* MOVD (RSP), Rd */ p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP - p.From.Offset = int64(c.autosize - 8) } } if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { @@ -804,12 +906,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Reg = int16(REG_LSL + r + (shift&7)<<5) p.From.Offset = 0 } - if p.To.Type == obj.TYPE_MEM && p.To.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) { - p.Spadj += int32(-p.To.Offset) - } - if p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) { - p.Spadj += int32(-p.From.Offset) - } } } |
