aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/pprof/pprof.go
diff options
context:
space:
mode:
authorNick Ripley <nick.ripley@datadoghq.com>2024-07-02 12:53:36 -0400
committerGopher Robot <gobot@golang.org>2024-07-09 21:41:42 +0000
commit183a40db6def2780478053b3cd14f3f5c355d999 (patch)
tree0ee9c062df560e185fb64f2ee802acf4048a9043 /src/runtime/pprof/pprof.go
parente89e880eacb1aceaa14733f44c38e6748d5ffa9c (diff)
downloadgo-183a40db6def2780478053b3cd14f3f5c355d999.tar.xz
runtime: avoid multiple records with identical stacks from MutexProfile
When using frame pointer unwinding, we defer frame skipping and inline expansion for call stacks until profile reporting time. We can end up with records which have different stacks if no frames are skipped, but identical stacks once skipping is taken into account. Returning multiple records with the same stack (but different values) has broken programs which rely on the records already being fully aggregated by call stack when returned from runtime.MutexProfile. This CL addresses the problem by handling skipping at recording time. We do full inline expansion to correctly skip the desired number of frames when recording the call stack, and then handle the rest of inline expansion when reporting the profile. The regression test in this CL is adapted from the reproducer in https://github.com/grafana/pyroscope-go/issues/103, authored by Tolya Korniltsev. Fixes #67548 Co-Authored-By: Tolya Korniltsev <korniltsev.anatoly@gmail.com> Change-Id: I6a42ce612377f235b2c8c0cec9ba8e9331224b84 Reviewed-on: https://go-review.googlesource.com/c/go/+/595966 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Auto-Submit: Carlos Amedee <carlos@golang.org> Reviewed-by: Felix Geisendörfer <felix.geisendoerfer@datadoghq.com>
Diffstat (limited to 'src/runtime/pprof/pprof.go')
-rw-r--r--src/runtime/pprof/pprof.go23
1 files changed, 21 insertions, 2 deletions
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
index d3af5bba91..4b7a9f63c6 100644
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -404,6 +404,25 @@ type countProfile interface {
Label(i int) *labelMap
}
+// expandInlinedFrames copies the call stack from pcs into dst, expanding any
+// PCs corresponding to inlined calls into the corresponding PCs for the inlined
+// functions. Returns the number of frames copied to dst.
+func expandInlinedFrames(dst, pcs []uintptr) int {
+ cf := runtime.CallersFrames(pcs)
+ var n int
+ for n < len(dst) {
+ f, more := cf.Next()
+ // f.PC is a "call PC", but later consumers will expect
+ // "return PCs"
+ dst[n] = f.PC + 1
+ n++
+ if !more {
+ break
+ }
+ }
+ return n
+}
+
// printCountCycleProfile outputs block profile records (for block or mutex profiles)
// as the pprof-proto format output. Translations from cycle count to time duration
// are done because The proto expects count and time (nanoseconds) instead of count
@@ -426,7 +445,7 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, records []
values[1] = int64(float64(r.Cycles) / cpuGHz)
// For count profiles, all stack addresses are
// return PCs, which is what appendLocsForStack expects.
- n := pprof_fpunwindExpand(expandedStack[:], r.Stack)
+ n := expandInlinedFrames(expandedStack, r.Stack)
locs = b.appendLocsForStack(locs[:0], expandedStack[:n])
b.pbSample(values, locs, nil)
}
@@ -935,7 +954,7 @@ func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile fu
for i := range p {
r := &p[i]
fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count)
- n := pprof_fpunwindExpand(expandedStack, r.Stack)
+ n := expandInlinedFrames(expandedStack, r.Stack)
stack := expandedStack[:n]
for _, pc := range stack {
fmt.Fprintf(w, " %#x", pc)