aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/internal/obj/arm64/asm7.go
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2025-05-17 15:05:56 -0700
committerKeith Randall <khr@golang.org>2025-10-06 14:11:41 -0700
commit719dfcf8a8478d70360bf3c34c0e920be7b32994 (patch)
treed58aaf3289de3bb18901e34b336da46b425f8075 /src/cmd/internal/obj/arm64/asm7.go
parentf3312124c2370c2f64a7f9ad29732ec30209647a (diff)
downloadgo-719dfcf8a8478d70360bf3c34c0e920be7b32994.tar.xz
cmd/compile: redo arm64 LR/FP save and restore
Instead of storing LR (the return address) at 0(SP) and the FP (parent's frame pointer) at -8(SP), store them at framesize-8(SP) and framesize-16(SP), respectively. We push and pop data onto the stack such that we're never accessing anything below SP. The prolog/epilog lengths are unchanged (3 insns for a typical prolog, 2 for a typical epilog). We use 8 bytes more per frame. Typical prologue: STP.W (FP, LR), -16(SP) MOVD SP, FP SUB $C, SP Typical epilogue: ADD $C, SP LDP.P 16(SP), (FP, LR) RET The previous word where we stored LR, at 0(SP), is now unused. We could repurpose that slot for storing a local variable. The new prolog and epilog instructions are recognized by libunwind, so pc-sampling tools like perf should now be accurate. (TODO: except maybe after the first RET instruction? Have to look into that.) Update #73753 (fixes, for arm64) Update #57302 (Quim thinks this will help on that issue) Change-Id: I4800036a9a9a08aaaf35d9f99de79a36cf37ebb8 Reviewed-on: https://go-review.googlesource.com/c/go/+/674615 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com>
Diffstat (limited to 'src/cmd/internal/obj/arm64/asm7.go')
-rw-r--r--src/cmd/internal/obj/arm64/asm7.go12
1 files changed, 5 insertions, 7 deletions
diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go
index 743d09a319..281d705a3e 100644
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@@ -51,7 +51,6 @@ type ctxt7 struct {
blitrl *obj.Prog
elitrl *obj.Prog
autosize int32
- extrasize int32
instoffset int64
pc int64
pool struct {
@@ -1122,8 +1121,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
ctxt.Diag("arm64 ops not initialized, call arm64.buildop first")
}
- c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)}
- p.To.Offset &= 0xffffffff // extrasize is no longer needed
+ c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)}
// Process literal pool and allocate initial program counter for each Prog, before
// generating branch veneers.
@@ -2119,8 +2117,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
// a.Offset is still relative to pseudo-SP.
a.Reg = obj.REG_NONE
}
- // The frame top 8 or 16 bytes are for FP
- c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
+ // The frame top 16 bytes are for LR/FP
+ c.instoffset = int64(c.autosize) + a.Offset - extrasize
return autoclass(c.instoffset)
case obj.NAME_PARAM:
@@ -2180,8 +2178,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
// a.Offset is still relative to pseudo-SP.
a.Reg = obj.REG_NONE
}
- // The frame top 8 or 16 bytes are for FP
- c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
+ // The frame top 16 bytes are for LR/FP
+ c.instoffset = int64(c.autosize) + a.Offset - extrasize
case obj.NAME_PARAM:
if a.Reg == REGSP {