aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2025-05-17 15:05:56 -0700
committerKeith Randall <khr@golang.org>2025-10-06 14:11:41 -0700
commit719dfcf8a8478d70360bf3c34c0e920be7b32994 (patch)
treed58aaf3289de3bb18901e34b336da46b425f8075 /src/runtime
parentf3312124c2370c2f64a7f9ad29732ec30209647a (diff)
downloadgo-719dfcf8a8478d70360bf3c34c0e920be7b32994.tar.xz
cmd/compile: redo arm64 LR/FP save and restore
Instead of storing LR (the return address) at 0(SP) and the FP (parent's frame pointer) at -8(SP), store them at framesize-8(SP) and framesize-16(SP), respectively. We push and pop data onto the stack such that we're never accessing anything below SP. The prolog/epilog lengths are unchanged (3 insns for a typical prolog, 2 for a typical epilog). We use 8 bytes more per frame. Typical prologue: STP.W (FP, LR), -16(SP) MOVD SP, FP SUB $C, SP Typical epilogue: ADD $C, SP LDP.P 16(SP), (FP, LR) RET The previous word where we stored LR, at 0(SP), is now unused. We could repurpose that slot for storing a local variable. The new prolog and epilog instructions are recognized by libunwind, so pc-sampling tools like perf should now be accurate. (TODO: except maybe after the first RET instruction? Have to look into that.) Update #73753 (fixes, for arm64) Update #57302 (Quim thinks this will help on that issue) Change-Id: I4800036a9a9a08aaaf35d9f99de79a36cf37ebb8 Reviewed-on: https://go-review.googlesource.com/c/go/+/674615 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/asm_arm64.s81
-rw-r--r--src/runtime/mkpreempt.go20
-rw-r--r--src/runtime/panic.go8
-rw-r--r--src/runtime/preempt_arm64.s15
-rw-r--r--src/runtime/race_arm64.s17
-rw-r--r--src/runtime/signal_arm64.go16
-rw-r--r--src/runtime/stack.go20
-rw-r--r--src/runtime/testdata/testprog/badtraceback.go5
-rw-r--r--src/runtime/traceback.go30
9 files changed, 118 insertions, 94 deletions
diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s
index a0e82ec830..aa49a27a75 100644
--- a/src/runtime/asm_arm64.s
+++ b/src/runtime/asm_arm64.s
@@ -50,9 +50,7 @@ TEXT _rt0_arm64_lib(SB),NOSPLIT,$184
CBZ R4, nocgo
MOVD $_rt0_arm64_lib_go(SB), R0
MOVD $0, R1
- SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
BL (R4)
- ADD $16, RSP
B restore
nocgo:
@@ -371,7 +369,6 @@ switch:
BL runtime·save_g(SB)
MOVD (g_sched+gobuf_sp)(g), R0
MOVD R0, RSP
- MOVD (g_sched+gobuf_bp)(g), R29
MOVD $0, (g_sched+gobuf_sp)(g)
MOVD $0, (g_sched+gobuf_bp)(g)
RET
@@ -381,8 +378,8 @@ noswitch:
// Using a tail call here cleans up tracebacks since we won't stop
// at an intermediate systemstack.
MOVD 0(R26), R3 // code pointer
- MOVD.P 16(RSP), R30 // restore LR
- SUB $8, RSP, R29 // restore FP
+ ADD $16, RSP
+ LDP.P 16(RSP), (R29,R30) // restore FP, LR
B (R3)
// func switchToCrashStack0(fn func())
@@ -1051,7 +1048,7 @@ again:
// Smashes R0.
TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
MOVD $runtime·systemstack_switch(SB), R0
- ADD $8, R0 // get past prologue
+ ADD $12, R0 // get past prologue
MOVD R0, (g_sched+gobuf_pc)(g)
MOVD RSP, R0
MOVD R0, (g_sched+gobuf_sp)(g)
@@ -1069,9 +1066,7 @@ TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
MOVD fn+0(FP), R1
MOVD arg+8(FP), R0
- SUB $16, RSP // skip over saved frame pointer below RSP
BL (R1)
- ADD $16, RSP // skip over saved frame pointer below RSP
RET
// func asmcgocall(fn, arg unsafe.Pointer) int32
@@ -1236,9 +1231,9 @@ havem:
BL runtime·save_g(SB)
MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
MOVD (g_sched+gobuf_pc)(g), R5
- MOVD R5, -48(R4)
+ MOVD R5, -8(R4)
MOVD (g_sched+gobuf_bp)(g), R5
- MOVD R5, -56(R4)
+ MOVD R5, -16(R4)
// Gather our arguments into registers.
MOVD fn+0(FP), R1
MOVD frame+8(FP), R2
@@ -1252,7 +1247,7 @@ havem:
CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
// Restore g->sched (== m->curg->sched) from saved values.
- MOVD 0(RSP), R5
+ MOVD 40(RSP), R5
MOVD R5, (g_sched+gobuf_pc)(g)
MOVD RSP, R4
ADD $48, R4, R4
@@ -1490,10 +1485,57 @@ GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
//
// This is ABIInternal because Go code injects its PC directly into new
// goroutine stacks.
+//
+// State before debugger starts doing anything:
+// | current |
+// | stack |
+// +-------------+ <- SP = origSP
+// stopped executing at PC = origPC
+// some values are in LR (origLR) and FP (origFP)
+//
+// After debugger has done steps 1-6 above:
+// | current |
+// | stack |
+// +-------------+ <- origSP
+// | ----- | (used to be a slot to store frame pointer on entry to origPC's frame.)
+// +-------------+
+// | origLR |
+// +-------------+ <- SP
+// | ----- |
+// +-------------+
+// | argsize |
+// +-------------+
+// LR = origPC, PC = debugCallV2
+//
+// debugCallV2 then modifies the stack up to the "good" label:
+// | current |
+// | stack |
+// +-------------+ <- origSP
+// | ----- | (used to be a slot to store frame pointer on entry to origPC's frame.)
+// +-------------+
+// | origLR |
+// +-------------+ <- where debugger left SP
+// | origPC |
+// +-------------+
+// | origFP |
+// +-------------+ <- FP = SP + 256
+// | saved |
+// | registers |
+// | (224 bytes) |
+// +-------------+ <- SP + 32
+// | space for |
+// | outargs |
+// +-------------+ <- SP + 8
+// | argsize |
+// +-------------+ <- SP
+
TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
- STP (R29, R30), -280(RSP)
- SUB $272, RSP, RSP
- SUB $8, RSP, R29
+ MOVD R30, -8(RSP) // save origPC
+ MOVD -16(RSP), R30 // save argsize in R30 temporarily
+ MOVD.W R29, -16(RSP) // push origFP
+ MOVD RSP, R29 // frame pointer chain now set up
+ SUB $256, RSP, RSP // allocate frame
+ MOVD R30, (RSP) // Save argsize on the stack
// Save all registers that may contain pointers so they can be
// conservatively scanned.
//
@@ -1515,7 +1557,8 @@ TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
STP (R0, R1), (4*8)(RSP)
// Perform a safe-point check.
- MOVD R30, 8(RSP) // Caller's PC
+ MOVD 264(RSP), R0 // origPC
+ MOVD R0, 8(RSP)
CALL runtime·debugCallCheck(SB)
MOVD 16(RSP), R0
CBZ R0, good
@@ -1559,7 +1602,7 @@ good:
CALL runtime·debugCallWrap(SB); \
JMP restore
- MOVD 256(RSP), R0 // the argument frame size
+ MOVD (RSP), R0 // the argument frame size
DEBUG_CALL_DISPATCH(debugCall32<>, 32)
DEBUG_CALL_DISPATCH(debugCall64<>, 64)
DEBUG_CALL_DISPATCH(debugCall128<>, 128)
@@ -1607,9 +1650,9 @@ restore:
LDP (6*8)(RSP), (R2, R3)
LDP (4*8)(RSP), (R0, R1)
- LDP -8(RSP), (R29, R27)
- ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
- MOVD -16(RSP), R30 // restore old lr
+ MOVD 272(RSP), R30 // restore old lr (saved by (*sigctxt).pushCall)
+ LDP 256(RSP), (R29, R27) // restore old fp, set up resumption address
+ ADD $288, RSP, RSP // Pop frame, LR+FP, and block pushed by (*sigctxt).pushCall
JMP (R27)
// runtime.debugCallCheck assumes that functions defined with the
diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go
index 769c4ffc5c..9064cae039 100644
--- a/src/runtime/mkpreempt.go
+++ b/src/runtime/mkpreempt.go
@@ -488,26 +488,18 @@ func genARM64(g *gen) {
l.stack += 8 // SP needs 16-byte alignment
}
- // allocate frame, save PC of interrupted instruction (in LR)
- p("MOVD R30, %d(RSP)", -l.stack)
+ // allocate frame, save PC (in R30), FP (in R29) of interrupted instruction
+ p("STP.W (R29, R30), -16(RSP)")
+ p("MOVD RSP, R29") // set up new frame pointer
p("SUB $%d, RSP", l.stack)
- p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux)
- p("SUB $8, RSP, R29") // set up new frame pointer
- // On iOS, save the LR again after decrementing SP. We run the
- // signal handler on the G stack (as it doesn't support sigaltstack),
- // so any writes below SP may be clobbered.
- p("#ifdef GOOS_ios")
- p("MOVD R30, (RSP)")
- p("#endif")
l.save(g)
p("CALL ·asyncPreempt2(SB)")
l.restore(g)
- p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
- p("MOVD -8(RSP), R29") // restore frame pointer
- p("MOVD (RSP), R27") // load PC to REGTMP
- p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall)
+ p("MOVD %d(RSP), R30", l.stack+16) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
+ p("LDP %d(RSP), (R29, R27)", l.stack) // Restore frame pointer. Load PC into regtmp.
+ p("ADD $%d, RSP", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall)
p("RET (R27)")
}
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index 8c91c9435a..04b3afe168 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -1379,10 +1379,10 @@ func recovery(gp *g) {
// the caller
gp.sched.bp = fp - 2*goarch.PtrSize
case goarch.IsArm64 != 0:
- // on arm64, the architectural bp points one word higher
- // than the sp. fp is totally useless to us here, because it
- // only gets us to the caller's fp.
- gp.sched.bp = sp - goarch.PtrSize
+ // on arm64, the first two words of the frame are caller's PC
+ // (the saved LR register) and the caller's BP.
+ // Coincidentally, the same as amd64.
+ gp.sched.bp = fp - 2*goarch.PtrSize
}
gogo(&gp.sched)
}
diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s
index 31ec9d940f..f4248cac25 100644
--- a/src/runtime/preempt_arm64.s
+++ b/src/runtime/preempt_arm64.s
@@ -4,13 +4,9 @@
#include "textflag.h"
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
- MOVD R30, -496(RSP)
+ STP.W (R29, R30), -16(RSP)
+ MOVD RSP, R29
SUB $496, RSP
- MOVD R29, -8(RSP)
- SUB $8, RSP, R29
- #ifdef GOOS_ios
- MOVD R30, (RSP)
- #endif
STP (R0, R1), 8(RSP)
STP (R2, R3), 24(RSP)
STP (R4, R5), 40(RSP)
@@ -78,8 +74,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
LDP 40(RSP), (R4, R5)
LDP 24(RSP), (R2, R3)
LDP 8(RSP), (R0, R1)
- MOVD 496(RSP), R30
- MOVD -8(RSP), R29
- MOVD (RSP), R27
- ADD $512, RSP
+ MOVD 512(RSP), R30
+ LDP 496(RSP), (R29, R27)
+ ADD $528, RSP
RET (R27)
diff --git a/src/runtime/race_arm64.s b/src/runtime/race_arm64.s
index 5df650105b..feaa328d4c 100644
--- a/src/runtime/race_arm64.s
+++ b/src/runtime/race_arm64.s
@@ -397,7 +397,7 @@ TEXT racecallatomic<>(SB), NOSPLIT, $0
// R3 = addr of incoming arg list
// Trigger SIGSEGV early.
- MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
+ MOVD 72(RSP), R3 // 1st arg is addr. after two small frames (32 bytes each), get it at 72(RSP)
MOVB (R3), R13 // segv here if addr is bad
// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
MOVD runtime·racearenastart(SB), R10
@@ -417,10 +417,11 @@ racecallatomic_ok:
// Addr is within the good range, call the atomic function.
load_g
MOVD g_racectx(g), R0 // goroutine context
- MOVD 16(RSP), R1 // caller pc
+ MOVD 56(RSP), R1 // caller pc
MOVD R9, R2 // pc
- ADD $40, RSP, R3
- JMP racecall<>(SB) // does not return
+ ADD $72, RSP, R3
+ BL racecall<>(SB)
+ RET
racecallatomic_ignore:
// Addr is outside the good range.
// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
@@ -435,9 +436,9 @@ racecallatomic_ignore:
// racecall will call LLVM race code which might clobber R28 (g)
load_g
MOVD g_racectx(g), R0 // goroutine context
- MOVD 16(RSP), R1 // caller pc
+ MOVD 56(RSP), R1 // caller pc
MOVD R9, R2 // pc
- ADD $40, RSP, R3 // arguments
+ ADD $72, RSP, R3 // arguments
BL racecall<>(SB)
// Call __tsan_go_ignore_sync_end.
MOVD $__tsan_go_ignore_sync_end(SB), R9
@@ -476,10 +477,6 @@ TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
MOVD (g_sched+gobuf_sp)(R11), R12
MOVD R12, RSP
call:
- // Decrement SP past where the frame pointer is saved in the Go arm64
- // ABI (one word below the stack pointer) so the race detector library
- // code doesn't clobber it
- SUB $16, RSP
BL R9
MOVD R19, RSP
JMP (R20)
diff --git a/src/runtime/signal_arm64.go b/src/runtime/signal_arm64.go
index af7d29f9de..61dad50721 100644
--- a/src/runtime/signal_arm64.go
+++ b/src/runtime/signal_arm64.go
@@ -8,7 +8,6 @@ package runtime
import (
"internal/abi"
- "internal/goarch"
"internal/runtime/sys"
"unsafe"
)
@@ -63,18 +62,11 @@ func (c *sigctxt) preparePanic(sig uint32, gp *g) {
// We arrange lr, and pc to pretend the panicking
// function calls sigpanic directly.
// Always save LR to stack so that panics in leaf
- // functions are correctly handled. This smashes
- // the stack frame but we're not going back there
- // anyway.
+ // functions are correctly handled.
+ // This extra space is known to gentraceback.
sp := c.sp() - sys.StackAlign // needs only sizeof uint64, but must align the stack
c.set_sp(sp)
*(*uint64)(unsafe.Pointer(uintptr(sp))) = c.lr()
- // Make sure a valid frame pointer is saved on the stack so that the
- // frame pointer checks in adjustframe are happy, if they're enabled.
- // Frame pointer unwinding won't visit the sigpanic frame, since
- // sigpanic will save the same frame pointer before calling into a panic
- // function.
- *(*uint64)(unsafe.Pointer(uintptr(sp - goarch.PtrSize))) = c.r29()
pc := gp.sigpc
@@ -96,10 +88,6 @@ func (c *sigctxt) pushCall(targetPC, resumePC uintptr) {
sp := c.sp() - 16 // SP needs 16-byte alignment
c.set_sp(sp)
*(*uint64)(unsafe.Pointer(uintptr(sp))) = c.lr()
- // Make sure a valid frame pointer is saved on the stack so that the
- // frame pointer checks in adjustframe are happy, if they're enabled.
- // This is not actually used for unwinding.
- *(*uint64)(unsafe.Pointer(uintptr(sp - goarch.PtrSize))) = c.r29()
// Set up PC and LR to pretend the function being signaled
// calls targetPC at resumePC.
c.set_lr(uint64(resumePC))
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index 55e97e77af..5eaceec6da 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -579,23 +579,27 @@ var ptrnames = []string{
// | args to callee |
// +------------------+ <- frame->sp
//
-// (arm)
+// (arm64)
// +------------------+
// | args from caller |
// +------------------+ <- frame->argp
-// | caller's retaddr |
+// | <unused> |
+// +------------------+ <- frame->fp (aka caller's sp)
+// | return address |
// +------------------+
-// | caller's FP (*) | (*) on ARM64, if framepointer_enabled && varp > sp
+// | caller's FP | (frame pointer always enabled: TODO)
// +------------------+ <- frame->varp
// | locals |
// +------------------+
// | args to callee |
// +------------------+
-// | return address |
+// | <unused> |
// +------------------+ <- frame->sp
//
// varp > sp means that the function has a frame;
// varp == sp means frameless function.
+//
+// Alignment padding, if needed, will be between "locals" and "args to callee".
type adjustinfo struct {
old stack
@@ -709,7 +713,8 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) {
}
// Adjust saved frame pointer if there is one.
- if (goarch.ArchFamily == goarch.AMD64 || goarch.ArchFamily == goarch.ARM64) && frame.argp-frame.varp == 2*goarch.PtrSize {
+ if goarch.ArchFamily == goarch.AMD64 && frame.argp-frame.varp == 2*goarch.PtrSize ||
+ goarch.ArchFamily == goarch.ARM64 && frame.argp-frame.varp == 3*goarch.PtrSize {
if stackDebug >= 3 {
print(" saved bp\n")
}
@@ -723,10 +728,7 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) {
throw("bad frame pointer")
}
}
- // On AMD64, this is the caller's frame pointer saved in the current
- // frame.
- // On ARM64, this is the frame pointer of the caller's caller saved
- // by the caller in its frame (one word below its SP).
+ // This is the caller's frame pointer saved in the current frame.
adjustpointer(adjinfo, unsafe.Pointer(frame.varp))
}
diff --git a/src/runtime/testdata/testprog/badtraceback.go b/src/runtime/testdata/testprog/badtraceback.go
index 455118a543..36575f765d 100644
--- a/src/runtime/testdata/testprog/badtraceback.go
+++ b/src/runtime/testdata/testprog/badtraceback.go
@@ -41,6 +41,11 @@ func badLR2(arg int) {
if runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" {
lrOff = 32 // FIXED_FRAME or sys.MinFrameSize
}
+ if runtime.GOARCH == "arm64" {
+ // skip 8 bytes at bottom of parent frame, then point
+ // to the 8 bytes of the saved PC at the top of the frame.
+ lrOff = 16
+ }
lrPtr := (*uintptr)(unsafe.Pointer(uintptr(unsafe.Pointer(&arg)) - lrOff))
*lrPtr = 0xbad
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 8882c306ed..1c3e679a02 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -175,6 +175,11 @@ func (u *unwinder) initAt(pc0, sp0, lr0 uintptr, gp *g, flags unwindFlags) {
// Start in the caller's frame.
if frame.pc == 0 {
if usesLR {
+ // TODO: this isn't right on arm64. But also, this should
+ // ~never happen. Calling a nil function will panic
+ // when loading the PC out of the closure, not when
+ // branching to that PC. (Closures should always have
+ // valid PCs in their first word.)
frame.pc = *(*uintptr)(unsafe.Pointer(frame.sp))
frame.lr = 0
} else {
@@ -369,7 +374,11 @@ func (u *unwinder) resolveInternal(innermost, isSyscall bool) {
var lrPtr uintptr
if usesLR {
if innermost && frame.sp < frame.fp || frame.lr == 0 {
- lrPtr = frame.sp
+ if GOARCH == "arm64" {
+ lrPtr = frame.fp - goarch.PtrSize
+ } else {
+ lrPtr = frame.sp
+ }
frame.lr = *(*uintptr)(unsafe.Pointer(lrPtr))
}
} else {
@@ -385,24 +394,17 @@ func (u *unwinder) resolveInternal(innermost, isSyscall bool) {
// On x86, call instruction pushes return PC before entering new function.
frame.varp -= goarch.PtrSize
}
+ if GOARCH == "arm64" && frame.varp > frame.sp {
+ frame.varp -= goarch.PtrSize // LR have been saved, skip over it.
+ }
// For architectures with frame pointers, if there's
// a frame, then there's a saved frame pointer here.
//
// NOTE: This code is not as general as it looks.
- // On x86, the ABI is to save the frame pointer word at the
+ // On x86 and arm64, the ABI is to save the frame pointer word at the
// top of the stack frame, so we have to back down over it.
- // On arm64, the frame pointer should be at the bottom of
- // the stack (with R29 (aka FP) = RSP), in which case we would
- // not want to do the subtraction here. But we started out without
- // any frame pointer, and when we wanted to add it, we didn't
- // want to break all the assembly doing direct writes to 8(RSP)
- // to set the first parameter to a called function.
- // So we decided to write the FP link *below* the stack pointer
- // (with R29 = RSP - 8 in Go functions).
- // This is technically ABI-compatible but not standard.
- // And it happens to end up mimicking the x86 layout.
- // Other architectures may make different decisions.
+ // No other architectures are framepointer-enabled at the moment.
if frame.varp > frame.sp && framepointer_enabled {
frame.varp -= goarch.PtrSize
}
@@ -562,7 +564,7 @@ func (u *unwinder) finishInternal() {
gp := u.g.ptr()
if u.flags&(unwindPrintErrors|unwindSilentErrors) == 0 && u.frame.sp != gp.stktopsp {
print("runtime: g", gp.goid, ": frame.sp=", hex(u.frame.sp), " top=", hex(gp.stktopsp), "\n")
- print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "\n")
+ print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "]\n")
throw("traceback did not unwind completely")
}
}