From 719dfcf8a8478d70360bf3c34c0e920be7b32994 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Sat, 17 May 2025 15:05:56 -0700 Subject: cmd/compile: redo arm64 LR/FP save and restore Instead of storing LR (the return address) at 0(SP) and the FP (parent's frame pointer) at -8(SP), store them at framesize-8(SP) and framesize-16(SP), respectively. We push and pop data onto the stack such that we're never accessing anything below SP. The prolog/epilog lengths are unchanged (3 insns for a typical prolog, 2 for a typical epilog). We use 8 bytes more per frame. Typical prologue: STP.W (FP, LR), -16(SP) MOVD SP, FP SUB $C, SP Typical epilogue: ADD $C, SP LDP.P 16(SP), (FP, LR) RET The previous word where we stored LR, at 0(SP), is now unused. We could repurpose that slot for storing a local variable. The new prolog and epilog instructions are recognized by libunwind, so pc-sampling tools like perf should now be accurate. (TODO: except maybe after the first RET instruction? Have to look into that.) Update #73753 (fixes, for arm64) Update #57302 (Quim thinks this will help on that issue) Change-Id: I4800036a9a9a08aaaf35d9f99de79a36cf37ebb8 Reviewed-on: https://go-review.googlesource.com/c/go/+/674615 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall --- src/runtime/stack.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'src/runtime/stack.go') diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 55e97e77af..5eaceec6da 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -579,23 +579,27 @@ var ptrnames = []string{ // | args to callee | // +------------------+ <- frame->sp // -// (arm) +// (arm64) // +------------------+ // | args from caller | // +------------------+ <- frame->argp -// | caller's retaddr | +// | | +// +------------------+ <- frame->fp (aka caller's sp) +// | return address | // +------------------+ -// | caller's FP (*) | (*) on ARM64, if framepointer_enabled && varp > sp +// | caller's FP | (frame pointer always enabled: TODO) // +------------------+ <- frame->varp // | locals | // +------------------+ // | args to callee | // +------------------+ -// | return address | +// | | // +------------------+ <- frame->sp // // varp > sp means that the function has a frame; // varp == sp means frameless function. +// +// Alignment padding, if needed, will be between "locals" and "args to callee". type adjustinfo struct { old stack @@ -709,7 +713,8 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) { } // Adjust saved frame pointer if there is one. - if (goarch.ArchFamily == goarch.AMD64 || goarch.ArchFamily == goarch.ARM64) && frame.argp-frame.varp == 2*goarch.PtrSize { + if goarch.ArchFamily == goarch.AMD64 && frame.argp-frame.varp == 2*goarch.PtrSize || + goarch.ArchFamily == goarch.ARM64 && frame.argp-frame.varp == 3*goarch.PtrSize { if stackDebug >= 3 { print(" saved bp\n") } @@ -723,10 +728,7 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) { throw("bad frame pointer") } } - // On AMD64, this is the caller's frame pointer saved in the current - // frame. - // On ARM64, this is the frame pointer of the caller's caller saved - // by the caller in its frame (one word below its SP). + // This is the caller's frame pointer saved in the current frame. adjustpointer(adjinfo, unsafe.Pointer(frame.varp)) } -- cgit v1.3