aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2023-08-28 14:57:29 -0400
committerCherry Mui <cherryyz@google.com>2023-10-26 18:46:50 +0000
commit0262ea1ff9ac3b9fd268a48fcaaa6811c20cbea2 (patch)
tree0b5ee8d3f2ce8c742b7fad74cdb70f17abed2c80 /src
parent29b80397a8f385ead0a9b3c11060a571438ef026 (diff)
downloadgo-0262ea1ff9ac3b9fd268a48fcaaa6811c20cbea2.tar.xz
runtime: print a stack trace at "morestack on g0"
Error like "morestack on g0" is one of the errors that is very hard to debug, because often it doesn't print a useful stack trace. The runtime doesn't directly print a stack trace because it is a bad stack state to call print. Sometimes the SIGABRT may trigger a traceback, but sometimes not especially in a cgo binary. Even if it triggers a traceback it often does not include the stack trace of the bad stack. This CL makes it explicitly print a stack trace and throw. The idea is to have some space as an "emergency" crash stack. When the stack is in a really bad state, we switch to the crash stack and do a traceback. Currently only implemented on AMD64 and ARM64. TODO: also handle errors like "morestack on gsignal" and bad systemstack. Also handle other architectures. Change-Id: Ibfc397202f2bb0737c5cbe99f2763de83301c1c1 Reviewed-on: https://go-review.googlesource.com/c/go/+/419435 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Pratt <mpratt@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/runtime/asm.s8
-rw-r--r--src/runtime/asm_amd64.s55
-rw-r--r--src/runtime/asm_arm64.s43
-rw-r--r--src/runtime/crash_test.go8
-rw-r--r--src/runtime/export_test.go2
-rw-r--r--src/runtime/proc.go58
6 files changed, 147 insertions, 27 deletions
diff --git a/src/runtime/asm.s b/src/runtime/asm.s
index f7bc5d432e..84e561fb43 100644
--- a/src/runtime/asm.s
+++ b/src/runtime/asm.s
@@ -12,3 +12,11 @@ TEXT ·sigpanic0(SB),NOSPLIT,$0-0
// See map.go comment on the need for this routine.
TEXT ·mapinitnoop<ABIInternal>(SB),NOSPLIT,$0-0
RET
+
+#ifndef GOARCH_amd64
+#ifndef GOARCH_arm64
+// stub to appease shared build mode.
+TEXT ·switchToCrashStack0<ABIInternal>(SB),NOSPLIT,$0-0
+ UNDEF
+#endif
+#endif
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index ccc2bd21fe..ab845fbd8a 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -537,6 +537,30 @@ bad:
CALL AX
INT $3
+// func switchToCrashStack0(fn func())
+TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
+ MOVQ g_m(R14), BX // curm
+
+ // set g to gcrash
+ LEAQ runtime·gcrash(SB), R14 // g = &gcrash
+ MOVQ BX, g_m(R14) // g.m = curm
+ MOVQ R14, m_g0(BX) // curm.g0 = g
+ get_tls(CX)
+ MOVQ R14, g(CX)
+
+ // switch to crashstack
+ MOVQ (g_stack+stack_hi)(R14), BX
+ SUBQ $(4*8), BX
+ MOVQ BX, SP
+
+ // call target function
+ MOVQ AX, DX
+ MOVQ 0(AX), AX
+ CALL AX
+
+ // should never return
+ CALL runtime·abort(SB)
+ UNDEF
/*
* support for morestack
@@ -551,17 +575,26 @@ bad:
TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
// Cannot grow scheduler stack (m->g0).
get_tls(CX)
- MOVQ g(CX), BX
- MOVQ g_m(BX), BX
- MOVQ m_g0(BX), SI
- CMPQ g(CX), SI
+ MOVQ g(CX), DI // DI = g
+ MOVQ g_m(DI), BX // BX = m
+
+ // Set g->sched to context in f.
+ MOVQ 0(SP), AX // f's PC
+ MOVQ AX, (g_sched+gobuf_pc)(DI)
+ LEAQ 8(SP), AX // f's SP
+ MOVQ AX, (g_sched+gobuf_sp)(DI)
+ MOVQ BP, (g_sched+gobuf_bp)(DI)
+ MOVQ DX, (g_sched+gobuf_ctxt)(DI)
+
+ MOVQ m_g0(BX), SI // SI = m.g0
+ CMPQ DI, SI
JNE 3(PC)
CALL runtime·badmorestackg0(SB)
CALL runtime·abort(SB)
// Cannot grow signal stack (m->gsignal).
MOVQ m_gsignal(BX), SI
- CMPQ g(CX), SI
+ CMPQ DI, SI
JNE 3(PC)
CALL runtime·badmorestackgsignal(SB)
CALL runtime·abort(SB)
@@ -573,17 +606,7 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
MOVQ AX, (m_morebuf+gobuf_pc)(BX)
LEAQ 16(SP), AX // f's caller's SP
MOVQ AX, (m_morebuf+gobuf_sp)(BX)
- get_tls(CX)
- MOVQ g(CX), SI
- MOVQ SI, (m_morebuf+gobuf_g)(BX)
-
- // Set g->sched to context in f.
- MOVQ 0(SP), AX // f's PC
- MOVQ AX, (g_sched+gobuf_pc)(SI)
- LEAQ 8(SP), AX // f's SP
- MOVQ AX, (g_sched+gobuf_sp)(SI)
- MOVQ BP, (g_sched+gobuf_bp)(SI)
- MOVQ DX, (g_sched+gobuf_ctxt)(SI)
+ MOVQ DI, (m_morebuf+gobuf_g)(BX)
// Call newstack on m->g0's stack.
MOVQ m_g0(BX), BX
diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s
index 7866e35e4f..6d77b08a1b 100644
--- a/src/runtime/asm_arm64.s
+++ b/src/runtime/asm_arm64.s
@@ -262,6 +262,30 @@ noswitch:
SUB $8, RSP, R29 // restore FP
B (R3)
+// func switchToCrashStack0(fn func())
+TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
+ MOVD R0, R26 // context register
+ MOVD g_m(g), R1 // curm
+
+ // set g to gcrash
+ MOVD $runtime·gcrash(SB), g // g = &gcrash
+ BL runtime·save_g(SB) // clobbers R0
+ MOVD R1, g_m(g) // g.m = curm
+ MOVD g, m_g0(R1) // curm.g0 = g
+
+ // switch to crashstack
+ MOVD (g_stack+stack_hi)(g), R1
+ SUB $(4*8), R1
+ MOVD R1, RSP
+
+ // call target function
+ MOVD 0(R26), R0
+ CALL (R0)
+
+ // should never return
+ CALL runtime·abort(SB)
+ UNDEF
+
/*
* support for morestack
*/
@@ -278,6 +302,16 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
// Cannot grow scheduler stack (m->g0).
MOVD g_m(g), R8
MOVD m_g0(R8), R4
+
+ // Called from f.
+ // Set g->sched to context in f
+ MOVD RSP, R0
+ MOVD R0, (g_sched+gobuf_sp)(g)
+ MOVD R29, (g_sched+gobuf_bp)(g)
+ MOVD LR, (g_sched+gobuf_pc)(g)
+ MOVD R3, (g_sched+gobuf_lr)(g)
+ MOVD R26, (g_sched+gobuf_ctxt)(g)
+
CMP g, R4
BNE 3(PC)
BL runtime·badmorestackg0(SB)
@@ -291,15 +325,6 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
B runtime·abort(SB)
// Called from f.
- // Set g->sched to context in f
- MOVD RSP, R0
- MOVD R0, (g_sched+gobuf_sp)(g)
- MOVD R29, (g_sched+gobuf_bp)(g)
- MOVD LR, (g_sched+gobuf_pc)(g)
- MOVD R3, (g_sched+gobuf_lr)(g)
- MOVD R26, (g_sched+gobuf_ctxt)(g)
-
- // Called from f.
// Set m->morebuf to f's callers.
MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
MOVD RSP, R0
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index 8dd95a44af..7a3b0388d7 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -804,6 +804,14 @@ func TestG0StackOverflow(t *testing.T) {
if n := strings.Count(string(out), "morestack on g0\n"); n != 1 {
t.Fatalf("%s\n(exit status %v)", out, err)
}
+ if runtime.GOARCH == "amd64" || runtime.GOARCH == "arm64" {
+ // check for a stack trace
+ want := "runtime.stackOverflow"
+ if n := strings.Count(string(out), want); n < 5 {
+ t.Errorf("output does not contain %q at least 5 times:\n%s", want, out)
+ }
+ return // it's not a signal-style traceback
+ }
// Check that it's a signal-style traceback.
if runtime.GOOS != "windows" {
if want := "PC="; !strings.Contains(string(out), want) {
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 6d1d3c4537..922794edd6 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -694,7 +694,7 @@ func G0StackOverflow() {
// The stack bounds for g0 stack is not always precise.
// Use an artificially small stack, to trigger a stack overflow
// without actually run out of the system stack (which may seg fault).
- g0.stack.lo = sp - 4096
+ g0.stack.lo = sp - 4096 - stackSystem
g0.stackguard0 = g0.stack.lo + stackGuard
g0.stackguard1 = g0.stackguard0
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index d560d3970e..408f26cf7a 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -516,7 +516,20 @@ func badreflectcall() {
//go:nosplit
//go:nowritebarrierrec
func badmorestackg0() {
- writeErrStr("fatal: morestack on g0\n")
+ if !crashStackImplemented {
+ writeErrStr("fatal: morestack on g0\n")
+ return
+ }
+
+ g := getg()
+ switchToCrashStack(func() {
+ print("runtime: morestack on g0, stack [", hex(g.stack.lo), " ", hex(g.stack.hi), "], sp=", hex(g.sched.sp), ", called from\n")
+ g.m.traceback = 2 // include pc and sp in stack trace
+ traceback1(g.sched.pc, g.sched.sp, g.sched.lr, g, 0)
+ print("\n")
+
+ throw("morestack on g0")
+ })
}
//go:nosplit
@@ -530,6 +543,49 @@ func badctxt() {
throw("ctxt != 0")
}
+// crashstack is a space that can be used as the stack when it is
+// crashing on bad stack conditions, e.g. morestack on g0.
+// gcrash is the corresponding (fake) g.
+var crashstack [16384]byte
+
+var gcrash = g{
+ stack: stack{uintptr(unsafe.Pointer(&crashstack)), uintptr(unsafe.Pointer(&crashstack)) + unsafe.Sizeof(crashstack)},
+ stackguard0: uintptr(unsafe.Pointer(&crashstack)) + 1000,
+ stackguard1: uintptr(unsafe.Pointer(&crashstack)) + 1000,
+}
+
+var crashingG atomic.Pointer[g]
+
+// Switch to crashstack and call fn, with special handling of
+// concurrent and recursive cases.
+//
+// Nosplit as it is called in a bad stack condition (we know
+// morestack would fail).
+//
+//go:nosplit
+//go:nowritebarrierrec
+func switchToCrashStack(fn func()) {
+ me := getg()
+ if crashingG.CompareAndSwapNoWB(nil, me) {
+ switchToCrashStack0(fn) // should never return
+ abort()
+ }
+ if crashingG.Load() == me {
+ // recursive crashing. too bad.
+ writeErrStr("fatal: recursive switchToCrashStack\n")
+ abort()
+ }
+ // Another g is crashing. Give it some time, hopefully it will finish traceback.
+ usleep_no_g(100)
+ writeErrStr("fatal: concurrent switchToCrashStack\n")
+ abort()
+}
+
+const crashStackImplemented = GOARCH == "amd64" || GOARCH == "arm64"
+
+//go:noescape
+func switchToCrashStack0(func()) // in assembly
+
func lockedOSThread() bool {
gp := getg()
return gp.lockedm != 0 && gp.m.lockedg != 0