aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorMichael Anthony Knyszek <mknyszek@google.com>2024-02-26 19:51:31 +0000
committerGopher Robot <gobot@golang.org>2024-04-05 20:42:48 +0000
commit5879bf7e38ac49e2e0caddd11cd4ddd4a4782437 (patch)
treef8a80eccea161b6e710bd68b2940db7e901ee39e /src/runtime
parent62791eb48912ef9dcaa26bee44c98891c8f60ddc (diff)
downloadgo-5879bf7e38ac49e2e0caddd11cd4ddd4a4782437.tar.xz
runtime: emit trace stacks for more goroutines in each generation
This change adds a new event, GoStatusStack, which is like GoStatus but also carries a stack ID. The purpose of this event is to emit stacks in more places, in particular for goroutines that may never emit a stack-bearing event in a whole generation. This CL targets one specific case: goroutines that were blocked or in a syscall the entire generation. This particular case is handled at the point that we scribble down the goroutine's status before the generation transition. That way, when we're finishing up the generation and emitting events for any goroutines we scribbled down, we have an accurate stack for those goroutines ready to go, and we emit a GoStatusStack instead of a GoStatus event. There's a small drawback with the way we scribble down the stack though: we immediately register it in the stack table instead of tracking the PCs. This means that if a goroutine does run and emit a trace event in between when we scribbled down its stack and the end of the generation, we will have recorded a stack that never actually gets referenced in the trace. This case should be rare. There are two remaining cases where we could emit stacks for goroutines but we don't. One is goroutines that get unblocked but either never run, or run and never block within a generation. We could take a stack trace at the point of unblocking the goroutine, if we're emitting a GoStatus event for it, but unfortunately we don't own the stack at that point. We could obtain ownership by grabbing its _Gscan bit, but that seems a little risky, since we could hold up the goroutine emitting the event for a while. Something to consider for the future. The other remaining case is a goroutine that was runnable when tracing started and began running, but then ran until the end of the generation without getting preempted or blocking. The main issue here is that although the goroutine will have a GoStatus event, it'll only have a GoStart event for it which doesn't emit a stack trace. This case is rare, but still certainly possible. I believe the only way to resolve it is to emit a GoStatusStack event instead of a GoStatus event for a goroutine that we're emitting GoStart for. This case is a bit easier than the last one because at the point of emitting GoStart, we have ownership of the goroutine's stack. We may consider dealing with these in the future, but for now, this CL captures a fairly large class of goroutines, so is worth it on its own. Fixes #65634. Change-Id: Ief3b6df5848b426e7ee6794e98dc7ef5f37ab2d0 Reviewed-on: https://go-review.googlesource.com/c/go/+/567076 Auto-Submit: Michael Knyszek <mknyszek@google.com> Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/trace2.go4
-rw-r--r--src/runtime/trace2event.go7
-rw-r--r--src/runtime/trace2runtime.go5
-rw-r--r--src/runtime/trace2stack.go46
-rw-r--r--src/runtime/trace2status.go8
5 files changed, 49 insertions, 21 deletions
diff --git a/src/runtime/trace2.go b/src/runtime/trace2.go
index a5ea9b4aa1..12647ca43b 100644
--- a/src/runtime/trace2.go
+++ b/src/runtime/trace2.go
@@ -323,6 +323,7 @@ func traceAdvance(stopTrace bool) {
gp *g
goid uint64
mid int64
+ stackID uint64
status uint32
waitreason waitReason
inMarkAssist bool
@@ -366,6 +367,7 @@ func traceAdvance(stopTrace bool) {
ug.status = readgstatus(s.g) &^ _Gscan
ug.waitreason = s.g.waitreason
ug.inMarkAssist = s.g.inMarkAssist
+ ug.stackID = traceStack(0, gp, gen)
}
resumeG(s)
casgstatus(me, _Gwaiting, _Grunning)
@@ -542,7 +544,7 @@ func traceAdvance(stopTrace bool) {
// traced in gen between when we recorded it and now. If that's true, the goid
// and status we recorded then is exactly what we want right now.
status := goStatusToTraceGoStatus(ug.status, ug.waitreason)
- statusWriter = statusWriter.writeGoStatus(ug.goid, ug.mid, status, ug.inMarkAssist)
+ statusWriter = statusWriter.writeGoStatus(ug.goid, ug.mid, status, ug.inMarkAssist, ug.stackID)
}
statusWriter.flush().end()
diff --git a/src/runtime/trace2event.go b/src/runtime/trace2event.go
index c568874821..26bb52f768 100644
--- a/src/runtime/trace2event.go
+++ b/src/runtime/trace2event.go
@@ -86,6 +86,9 @@ const (
traceEvGoSwitch // goroutine switch (coroswitch) [timestamp, goroutine ID, goroutine seq]
traceEvGoSwitchDestroy // goroutine switch and destroy [timestamp, goroutine ID, goroutine seq]
traceEvGoCreateBlocked // goroutine creation (starts blocked) [timestamp, new goroutine ID, new stack ID, stack ID]
+
+ // GoStatus with stack.
+ traceEvGoStatusStack // goroutine status at the start of a generation, with a stack [timestamp, goroutine ID, M ID, status, stack ID]
)
// traceArg is a simple wrapper type to help ensure that arguments passed
@@ -119,7 +122,7 @@ func (tl traceLocker) eventWriter(goStatus traceGoStatus, procStatus traceProcSt
w = w.writeProcStatus(uint64(pp.id), procStatus, pp.trace.inSweep)
}
if gp := tl.mp.curg; gp != nil && !gp.trace.statusWasTraced(tl.gen) && gp.trace.acquireStatus(tl.gen) {
- w = w.writeGoStatus(uint64(gp.goid), int64(tl.mp.procid), goStatus, gp.inMarkAssist)
+ w = w.writeGoStatus(uint64(gp.goid), int64(tl.mp.procid), goStatus, gp.inMarkAssist, 0 /* no stack */)
}
return traceEventWriter{w}
}
@@ -168,7 +171,7 @@ func (w traceWriter) event(ev traceEv, args ...traceArg) traceWriter {
// It then returns a traceArg representing that stack which may be
// passed to write.
func (tl traceLocker) stack(skip int) traceArg {
- return traceArg(traceStack(skip, tl.mp, tl.gen))
+ return traceArg(traceStack(skip, nil, tl.gen))
}
// startPC takes a start PC for a goroutine and produces a unique
diff --git a/src/runtime/trace2runtime.go b/src/runtime/trace2runtime.go
index 3e55226f28..6623879e6b 100644
--- a/src/runtime/trace2runtime.go
+++ b/src/runtime/trace2runtime.go
@@ -471,7 +471,10 @@ func (tl traceLocker) GoSwitch(nextg *g, destroy bool) {
// unblocked to the trace writer.
func emitUnblockStatus(w traceWriter, gp *g, gen uintptr) traceWriter {
if !gp.trace.statusWasTraced(gen) && gp.trace.acquireStatus(gen) {
- w = w.writeGoStatus(gp.goid, -1, traceGoWaiting, gp.inMarkAssist)
+ // TODO(go.dev/issue/65634): Although it would be nice to add a stack trace here of gp,
+ // we cannot safely do so. gp is in _Gwaiting and so we don't have ownership of its stack.
+ // We can fix this by acquiring the goroutine's scan bit.
+ w = w.writeGoStatus(gp.goid, -1, traceGoWaiting, gp.inMarkAssist, 0)
}
return w
}
diff --git a/src/runtime/trace2stack.go b/src/runtime/trace2stack.go
index af6638fa8f..44588fa39e 100644
--- a/src/runtime/trace2stack.go
+++ b/src/runtime/trace2stack.go
@@ -27,8 +27,9 @@ const (
logicalStackSentinel = ^uintptr(0)
)
-// traceStack captures a stack trace and registers it in the trace stack table.
-// It then returns its unique ID.
+// traceStack captures a stack trace from a goroutine and registers it in the trace
+// stack table. It then returns its unique ID. If gp == nil, then traceStack will
+// attempt to use the current execution context.
//
// skip controls the number of leaf frames to omit in order to hide tracer internals
// from stack traces, see CL 5523.
@@ -36,13 +37,22 @@ const (
// Avoid calling this function directly. gen needs to be the current generation
// that this stack trace is being written out for, which needs to be synchronized with
// generations moving forward. Prefer traceEventWriter.stack.
-func traceStack(skip int, mp *m, gen uintptr) uint64 {
+func traceStack(skip int, gp *g, gen uintptr) uint64 {
var pcBuf [traceStackSize]uintptr
- gp := getg()
- curgp := gp.m.curg
+ // Figure out gp and mp for the backtrace.
+ var mp *m
+ if gp == nil {
+ mp = getg().m
+ gp = mp.curg
+ }
+ if gp != nil && mp == nil {
+ // We're getting the backtrace for a G that's not currently executing.
+ // It may still have an M, if it's locked to some M.
+ mp = gp.lockedm.ptr()
+ }
nstk := 1
- if tracefpunwindoff() || mp.hasCgoOnStack() {
+ if tracefpunwindoff() || (mp != nil && mp.hasCgoOnStack()) {
// Slow path: Unwind using default unwinder. Used when frame pointer
// unwinding is unavailable or disabled (tracefpunwindoff), or might
// produce incomplete results or crashes (hasCgoOnStack). Note that no
@@ -50,30 +60,36 @@ func traceStack(skip int, mp *m, gen uintptr) uint64 {
// motivation is to take advantage of a potentially registered cgo
// symbolizer.
pcBuf[0] = logicalStackSentinel
- if curgp == gp {
+ if getg() == gp {
nstk += callers(skip+1, pcBuf[1:])
- } else if curgp != nil {
- nstk += gcallers(curgp, skip, pcBuf[1:])
+ } else if gp != nil {
+ nstk += gcallers(gp, skip, pcBuf[1:])
}
} else {
// Fast path: Unwind using frame pointers.
pcBuf[0] = uintptr(skip)
- if curgp == gp {
+ if getg() == gp {
nstk += fpTracebackPCs(unsafe.Pointer(getfp()), pcBuf[1:])
- } else if curgp != nil {
- // We're called on the g0 stack through mcall(fn) or systemstack(fn). To
+ } else if gp != nil {
+ // Two cases:
+ //
+ // (1) We're called on the g0 stack through mcall(fn) or systemstack(fn). To
// behave like gcallers above, we start unwinding from sched.bp, which
// points to the caller frame of the leaf frame on g's stack. The return
// address of the leaf frame is stored in sched.pc, which we manually
// capture here.
- pcBuf[1] = curgp.sched.pc
- nstk += 1 + fpTracebackPCs(unsafe.Pointer(curgp.sched.bp), pcBuf[2:])
+ //
+ // (2) We're called against a gp that we're not currently executing on, in
+ // which case it's currently not executing. gp.sched contains the most up-to-date
+ // information about where it stopped, and like case (1), we match gcallers here.
+ pcBuf[1] = gp.sched.pc
+ nstk += 1 + fpTracebackPCs(unsafe.Pointer(gp.sched.bp), pcBuf[2:])
}
}
if nstk > 0 {
nstk-- // skip runtime.goexit
}
- if nstk > 0 && curgp.goid == 1 {
+ if nstk > 0 && gp.goid == 1 {
nstk-- // skip runtime.main
}
id := trace.stackTab[gen%2].put(pcBuf[:nstk])
diff --git a/src/runtime/trace2status.go b/src/runtime/trace2status.go
index 34f2e4c06f..48ecb363a6 100644
--- a/src/runtime/trace2status.go
+++ b/src/runtime/trace2status.go
@@ -48,7 +48,7 @@ const (
)
// writeGoStatus emits a GoStatus event as well as any active ranges on the goroutine.
-func (w traceWriter) writeGoStatus(goid uint64, mid int64, status traceGoStatus, markAssist bool) traceWriter {
+func (w traceWriter) writeGoStatus(goid uint64, mid int64, status traceGoStatus, markAssist bool, stackID uint64) traceWriter {
// The status should never be bad. Some invariant must have been violated.
if status == traceGoBad {
print("runtime: goid=", goid, "\n")
@@ -56,7 +56,11 @@ func (w traceWriter) writeGoStatus(goid uint64, mid int64, status traceGoStatus,
}
// Trace the status.
- w = w.event(traceEvGoStatus, traceArg(goid), traceArg(uint64(mid)), traceArg(status))
+ if stackID == 0 {
+ w = w.event(traceEvGoStatus, traceArg(goid), traceArg(uint64(mid)), traceArg(status))
+ } else {
+ w = w.event(traceEvGoStatusStack, traceArg(goid), traceArg(uint64(mid)), traceArg(status), traceArg(stackID))
+ }
// Trace any special ranges that are in-progress.
if markAssist {