aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2025-10-07 07:58:50 -0700
committerGopher Robot <gobot@golang.org>2025-10-07 08:21:14 -0700
commitc938051dd0b80a5c60572d6807270d06ca685d2e (patch)
treec4e69ab40d336c192c7cb7ec0a5f50ed54f97d80 /src/cmd/internal/obj
parent64699542031b994ec4fdb6de887a94b69a372f9b (diff)
downloadgo-c938051dd0b80a5c60572d6807270d06ca685d2e.tar.xz
Revert "cmd/compile: redo arm64 LR/FP save and restore"
This reverts commit 719dfcf8a8478d70360bf3c34c0e920be7b32994. Reason for revert: Causing crashes. Change-Id: I0b8526dd03d82fa074ce4f97f1789eeac702b3eb Reviewed-on: https://go-review.googlesource.com/c/go/+/709755 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
Diffstat (limited to 'src/cmd/internal/obj')
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go12
-rw-r--r--src/cmd/internal/obj/arm64/obj7.go314
2 files changed, 212 insertions, 114 deletions
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 281d705a3e..743d09a319 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -51,6 +51,7 @@ type ctxt7 struct {
blitrl *obj.Prog
elitrl *obj.Prog
autosize int32
+ extrasize int32
instoffset int64
pc int64
pool struct {
@@ -1121,7 +1122,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
ctxt.Diag("arm64 ops not initialized, call arm64.buildop first")
}
- c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)}
+ c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)}
+ p.To.Offset &= 0xffffffff // extrasize is no longer needed
// Process literal pool and allocate initial program counter for each Prog, before
// generating branch veneers.
@@ -2117,8 +2119,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
// a.Offset is still relative to pseudo-SP.
a.Reg = obj.REG_NONE
}
- // The frame top 16 bytes are for LR/FP
- c.instoffset = int64(c.autosize) + a.Offset - extrasize
+ // The frame top 8 or 16 bytes are for FP
+ c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
return autoclass(c.instoffset)
case obj.NAME_PARAM:
@@ -2178,8 +2180,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
// a.Offset is still relative to pseudo-SP.
a.Reg = obj.REG_NONE
}
- // The frame top 16 bytes are for LR/FP
- c.instoffset = int64(c.autosize) + a.Offset - extrasize
+ // The frame top 8 or 16 bytes are for FP
+ c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
case obj.NAME_PARAM:
if a.Reg == REGSP {
diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go
index a697426145..2583e46354 100644
--- a/src/cmd/internal/obj/arm64/obj7.go
+++ b/src/cmd/internal/obj/arm64/obj7.go
@@ -36,6 +36,7 @@ import (
"cmd/internal/src"
"cmd/internal/sys"
"internal/abi"
+ "internal/buildcfg"
"log"
"math"
)
@@ -471,8 +472,6 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) {
obj.Nopout(p)
}
-const extrasize = 16 // space needed in the frame for LR+FP
-
func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
return
@@ -522,26 +521,33 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
c.autosize = int32(textstksiz)
if p.Mark&LEAF != 0 && c.autosize == 0 {
- // A leaf function with no locals needs no frame.
+ // A leaf function with no locals has no frame.
p.From.Sym.Set(obj.AttrNoFrame, true)
}
if !p.From.Sym.NoFrame() {
// If there is a stack frame at all, it includes
- // space for the (now unused) word at [SP:SP+8].
+ // space to save the LR.
c.autosize += 8
}
- // Round up to a multiple of 16.
- c.autosize += (-c.autosize) & 15
-
if c.autosize != 0 {
- // Allocate an extra 16 bytes at the top of the frame
- // to save LR+FP.
+ extrasize := int32(0)
+ if c.autosize%16 == 8 {
+ // Allocate extra 8 bytes on the frame top to save FP
+ extrasize = 8
+ } else if c.autosize&(16-1) == 0 {
+ // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16
+ extrasize = 16
+ } else {
+ c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8)
+ }
c.autosize += extrasize
c.cursym.Func().Locals += extrasize
- p.To.Offset = int64(c.autosize)
+ // low 32 bits for autosize
+ // high 32 bits for extrasize
+ p.To.Offset = int64(c.autosize) | int64(extrasize)<<32
} else {
// NOFRAME
p.To.Offset = 0
@@ -574,72 +580,120 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
var prologueEnd *obj.Prog
aoffset := c.autosize
- if aoffset < 16 {
- log.Fatalf("aoffset too small %d", aoffset)
+ if aoffset > 0xf0 {
+ // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned.
+ // so the maximum aoffset value is 0xf0.
+ aoffset = 0xf0
}
+ // Frame is non-empty. Make sure to save link register, even if
+ // it is a leaf function, so that traceback works.
q = p
+ if c.autosize > aoffset {
+ // Frame size is too large for a MOVD.W instruction. Store the frame pointer
+ // register and link register before decrementing SP, so if a signal comes
+ // during the execution of the function prologue, the traceback code will
+ // not see a half-updated stack frame.
- // Store return address and frame pointer at the top of the stack frame.
- // STP.W (R29, R30), -16(SP)
- q1 = obj.Appendp(q, c.newprog)
- q1.Pos = p.Pos
- q1.As = ASTP
- q1.From.Type = obj.TYPE_REGREG
- q1.From.Reg = REGFP
- q1.From.Offset = REGLINK
- q1.To.Type = obj.TYPE_MEM
- q1.To.Reg = REG_RSP
- q1.To.Offset = -16
- q1.Scond = C_XPRE
+ // SUB $autosize, RSP, R20
+ q1 = obj.Appendp(q, c.newprog)
+ q1.Pos = p.Pos
+ q1.As = ASUB
+ q1.From.Type = obj.TYPE_CONST
+ q1.From.Offset = int64(c.autosize)
+ q1.Reg = REGSP
+ q1.To.Type = obj.TYPE_REG
+ q1.To.Reg = REG_R20
- prologueEnd = q1
+ prologueEnd = q1
- // Update frame pointer
- q1 = obj.Appendp(q1, c.newprog)
- q1.Pos = p.Pos
- q1.As = AMOVD
- q1.From.Type = obj.TYPE_REG
- q1.From.Reg = REGSP
- q1.To.Type = obj.TYPE_REG
- q1.To.Reg = REGFP
+ // STP (R29, R30), -8(R20)
+ q1 = obj.Appendp(q1, c.newprog)
+ q1.Pos = p.Pos
+ q1.As = ASTP
+ q1.From.Type = obj.TYPE_REGREG
+ q1.From.Reg = REGFP
+ q1.From.Offset = REGLINK
+ q1.To.Type = obj.TYPE_MEM
+ q1.To.Reg = REG_R20
+ q1.To.Offset = -8
- // Allocate additional frame space.
- adj := aoffset - 16
- if adj > 0 {
- // SUB $autosize-16, RSP
- if adj < 1<<12 {
- q1 = obj.Appendp(q1, c.newprog)
- q1.Pos = p.Pos
- q1.As = ASUB
- q1.From.Type = obj.TYPE_CONST
- q1.From.Offset = int64(adj)
- q1.To.Type = obj.TYPE_REG
- q1.To.Reg = REGSP
- } else {
- // Constant too big for atomic subtract.
- // Materialize in tmp register first.
- q1 = obj.Appendp(q1, c.newprog)
- q1.Pos = p.Pos
- q1.As = AMOVD
- q1.From.Type = obj.TYPE_CONST
- q1.From.Offset = int64(adj)
- q1.To.Type = obj.TYPE_REG
- q1.To.Reg = REGTMP
+ // This is not async preemptible, as if we open a frame
+ // at the current SP, it will clobber the saved LR.
+ q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
+
+ // MOVD R20, RSP
+ q1 = obj.Appendp(q1, c.newprog)
+ q1.Pos = p.Pos
+ q1.As = AMOVD
+ q1.From.Type = obj.TYPE_REG
+ q1.From.Reg = REG_R20
+ q1.To.Type = obj.TYPE_REG
+ q1.To.Reg = REGSP
+ q1.Spadj = c.autosize
+
+ q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
+ if buildcfg.GOOS == "ios" {
+ // iOS does not support SA_ONSTACK. We will run the signal handler
+ // on the G stack. If we write below SP, it may be clobbered by
+ // the signal handler. So we save FP and LR after decrementing SP.
+ // STP (R29, R30), -8(RSP)
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
- q1.As = ASUB
- q1.From.Type = obj.TYPE_REG
- q1.From.Reg = REGTMP
- q1.To.Type = obj.TYPE_REG
+ q1.As = ASTP
+ q1.From.Type = obj.TYPE_REGREG
+ q1.From.Reg = REGFP
+ q1.From.Offset = REGLINK
+ q1.To.Type = obj.TYPE_MEM
q1.To.Reg = REGSP
+ q1.To.Offset = -8
}
- q1.Spadj = adj
+ } else {
+ // small frame, update SP and save LR in a single MOVD.W instruction.
+ // So if a signal comes during the execution of the function prologue,
+ // the traceback code will not see a half-updated stack frame.
+ // Also, on Linux, in a cgo binary we may get a SIGSETXID signal
+ // early on before the signal stack is set, as glibc doesn't allow
+ // us to block SIGSETXID. So it is important that we don't write below
+ // the SP until the signal stack is set.
+ // Luckily, all the functions from thread entry to setting the signal
+ // stack have small frames.
+ q1 = obj.Appendp(q, c.newprog)
+ q1.As = AMOVD
+ q1.Pos = p.Pos
+ q1.From.Type = obj.TYPE_REG
+ q1.From.Reg = REGLINK
+ q1.To.Type = obj.TYPE_MEM
+ q1.Scond = C_XPRE
+ q1.To.Offset = int64(-aoffset)
+ q1.To.Reg = REGSP
+ q1.Spadj = aoffset
+
+ prologueEnd = q1
+
+ // Frame pointer.
+ q1 = obj.Appendp(q1, c.newprog)
+ q1.Pos = p.Pos
+ q1.As = AMOVD
+ q1.From.Type = obj.TYPE_REG
+ q1.From.Reg = REGFP
+ q1.To.Type = obj.TYPE_MEM
+ q1.To.Reg = REGSP
+ q1.To.Offset = -8
}
prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
+ q1 = obj.Appendp(q1, c.newprog)
+ q1.Pos = p.Pos
+ q1.As = ASUB
+ q1.From.Type = obj.TYPE_CONST
+ q1.From.Offset = 8
+ q1.Reg = REGSP
+ q1.To.Type = obj.TYPE_REG
+ q1.To.Reg = REGFP
+
case obj.ARET:
nocache(p)
if p.From.Type == obj.TYPE_CONST {
@@ -653,56 +707,105 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
}
p.To = obj.Addr{}
aoffset := c.autosize
- if aoffset > 0 {
- if aoffset < 16 {
- log.Fatalf("aoffset too small %d", aoffset)
- }
- adj := aoffset - 16
- if adj > 0 {
- if adj < 1<<12 {
- // ADD $adj, RSP, RSP
- p.As = AADD
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = int64(adj)
- p.To.Type = obj.TYPE_REG
- p.To.Reg = REGSP
- } else {
- // Put frame size in a separate register and
- // add it in with a single instruction,
- // so we never have a partial frame during
- // the epilog. See issue 73259.
-
- // MOVD $adj, REGTMP
- p.As = AMOVD
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = int64(adj)
- p.To.Type = obj.TYPE_REG
- p.To.Reg = REGTMP
- // ADD REGTMP, RSP, RSP
- p = obj.Appendp(p, c.newprog)
- p.As = AADD
- p.From.Type = obj.TYPE_REG
- p.From.Reg = REGTMP
- p.To.Type = obj.TYPE_REG
- p.To.Reg = REGSP
- }
- p.Spadj = -adj
- }
+ if c.cursym.Func().Text.Mark&LEAF != 0 {
+ if aoffset != 0 {
+ // Restore frame pointer.
+ // ADD $framesize-8, RSP, R29
+ p.As = AADD
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = int64(c.autosize) - 8
+ p.Reg = REGSP
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REGFP
- // Pop LR+FP.
- // LDP.P 16(RSP), (R29, R30)
- if p.As != obj.ARET {
+ // Pop stack frame.
+ // ADD $framesize, RSP, RSP
p = obj.Appendp(p, c.newprog)
+ p.As = AADD
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = int64(c.autosize)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REGSP
+ p.Spadj = -c.autosize
}
- p.As = ALDP
+ } else if aoffset <= 0xF0 {
+ // small frame, restore LR and update SP in a single MOVD.P instruction.
+ // There is no correctness issue to use a single LDP for LR and FP,
+ // but the instructions are not pattern matched with the prologue's
+ // MOVD.W and MOVD, which may cause performance issue in
+ // store-forwarding.
+
+ // MOVD -8(RSP), R29
+ p.As = AMOVD
p.From.Type = obj.TYPE_MEM
p.From.Reg = REGSP
- p.From.Offset = 16
+ p.From.Offset = -8
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REGFP
+ p = obj.Appendp(p, c.newprog)
+
+ // MOVD.P offset(RSP), R30
+ p.As = AMOVD
+ p.From.Type = obj.TYPE_MEM
p.Scond = C_XPOST
+ p.From.Offset = int64(aoffset)
+ p.From.Reg = REGSP
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = REGLINK
+ p.Spadj = -aoffset
+ } else {
+ // LDP -8(RSP), (R29, R30)
+ p.As = ALDP
+ p.From.Type = obj.TYPE_MEM
+ p.From.Offset = -8
+ p.From.Reg = REGSP
p.To.Type = obj.TYPE_REGREG
p.To.Reg = REGFP
p.To.Offset = REGLINK
- p.Spadj = -16
+
+ if aoffset < 1<<12 {
+ // ADD $aoffset, RSP, RSP
+ q = newprog()
+ q.As = AADD
+ q.From.Type = obj.TYPE_CONST
+ q.From.Offset = int64(aoffset)
+ q.To.Type = obj.TYPE_REG
+ q.To.Reg = REGSP
+ q.Spadj = -aoffset
+ q.Pos = p.Pos
+ q.Link = p.Link
+ p.Link = q
+ p = q
+ } else {
+ // Put frame size in a separate register and
+ // add it in with a single instruction,
+ // so we never have a partial frame during
+ // the epilog. See issue 73259.
+
+ // MOVD $aoffset, REGTMP
+ q = newprog()
+ q.As = AMOVD
+ q.From.Type = obj.TYPE_CONST
+ q.From.Offset = int64(aoffset)
+ q.To.Type = obj.TYPE_REG
+ q.To.Reg = REGTMP
+ q.Pos = p.Pos
+ q.Link = p.Link
+ p.Link = q
+ p = q
+ // ADD REGTMP, RSP, RSP
+ q = newprog()
+ q.As = AADD
+ q.From.Type = obj.TYPE_REG
+ q.From.Reg = REGTMP
+ q.To.Type = obj.TYPE_REG
+ q.To.Reg = REGSP
+ q.Spadj = -aoffset
+ q.Pos = p.Pos
+ q.Link = p.Link
+ p.Link = q
+ p = q
+ }
}
// If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',
@@ -765,11 +868,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.From.Type = obj.TYPE_REG
p.From.Reg = REGLINK
} else {
- /* MOVD framesize-8(RSP), Rd */
+ /* MOVD (RSP), Rd */
p.As = AMOVD
p.From.Type = obj.TYPE_MEM
p.From.Reg = REGSP
- p.From.Offset = int64(c.autosize - 8)
}
}
if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
@@ -804,12 +906,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.From.Reg = int16(REG_LSL + r + (shift&7)<<5)
p.From.Offset = 0
}
- if p.To.Type == obj.TYPE_MEM && p.To.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) {
- p.Spadj += int32(-p.To.Offset)
- }
- if p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) {
- p.Spadj += int32(-p.From.Offset)
- }
}
}