aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/pprof
diff options
context:
space:
mode:
authorAustin Clements <austin@google.com>2017-04-20 15:54:12 -0400
committerAustin Clements <austin@google.com>2017-05-15 18:20:23 +0000
commit9e83c11fca44282e4b8b1e099f437f557b73b43e (patch)
tree49d03dd419bd2be568155423bd54731bc141f41d /src/runtime/pprof
parent1dc0f9696b743b10bf0b7b49780bee7f4756499b (diff)
downloadgo-9e83c11fca44282e4b8b1e099f437f557b73b43e.tar.xz
runtime/pprof: expand inlined frames in symbolized proto profiles
Currently proto symbolization uses runtime.FuncForPC and assumes each PC maps to a single frame. This isn't true in the presence of inlining (even with leaf-only inlining this can get incorrect results). Change PC symbolization to use runtime.CallersFrames to expand each PC to all of the frames at that PC. Change-Id: I8d20dff7495a5de495ae07f569122c225d433ced Reviewed-on: https://go-review.googlesource.com/41256 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Matloob <matloob@golang.org>
Diffstat (limited to 'src/runtime/pprof')
-rw-r--r--src/runtime/pprof/pprof.go4
-rw-r--r--src/runtime/pprof/pprof_test.go27
-rw-r--r--src/runtime/pprof/proto.go98
-rw-r--r--src/runtime/pprof/protomem.go5
4 files changed, 91 insertions, 43 deletions
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
index 74092d2565..b6253b1aa5 100644
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -400,9 +400,7 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
locs = locs[:0]
for _, addr := range p.Stack(index[k]) {
// For count profiles, all stack addresses are
- // return PCs. Adjust them to be call PCs for
- // locForPC.
- addr--
+ // return PCs, which is what locForPC expects.
locs = append(locs, b.locForPC(addr))
}
b.pbSample(values, locs, nil)
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index f762fa5a6c..0d957ad848 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -87,6 +87,33 @@ func TestCPUProfileMultithreaded(t *testing.T) {
})
}
+func TestCPUProfileInlining(t *testing.T) {
+ testCPUProfile(t, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, func(dur time.Duration) {
+ cpuHogger(inlinedCaller, dur)
+ })
+}
+
+func inlinedCaller() {
+ inlinedCallee()
+}
+
+func inlinedCallee() {
+ // We could just use cpuHog1, but for loops prevent inlining
+ // right now. :(
+ foo := salt1
+ i := 0
+loop:
+ if foo > 0 {
+ foo *= foo
+ } else {
+ foo *= foo + 1
+ }
+ if i++; i < 1e5 {
+ goto loop
+ }
+ salt1 = foo
+}
+
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) {
p, err := profile.Parse(bytes.NewReader(valBytes))
if err != nil {
diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go
index 5210706f4e..9e16e580ee 100644
--- a/src/runtime/pprof/proto.go
+++ b/src/runtime/pprof/proto.go
@@ -43,7 +43,7 @@ type profileBuilder struct {
strings []string
stringMap map[string]int
locs map[uintptr]int
- funcs map[*runtime.Func]int
+ funcs map[string]int // Package path-qualified function name to Function.ID
mem []memMap
}
@@ -183,24 +183,60 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file
}
// locForPC returns the location ID for addr.
-// addr must be a call address (not a return address).
+// addr must be a return PC. This returns the location of the call.
// It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) locForPC(addr uintptr) uint64 {
id := uint64(b.locs[addr])
if id != 0 {
return id
}
- f := runtime.FuncForPC(addr)
- if f != nil && f.Name() == "runtime.goexit" {
+
+ // Expand this one address using CallersFrames so we can cache
+ // each expansion. In general, CallersFrames takes a whole
+ // stack, but in this case we know there will be no skips in
+ // the stack and we have return PCs anyway.
+ frames := runtime.CallersFrames([]uintptr{addr})
+ frame, more := frames.Next()
+ if frame.Function == "runtime.goexit" {
+ // Short-circuit if we see runtime.goexit so the loop
+ // below doesn't allocate a useless empty location.
return 0
}
- funcID, lineno := b.funcForPC(addr)
+
+ if frame.PC == 0 {
+ // If we failed to resolve the frame, at least make up
+ // a reasonable call PC. This mostly happens in tests.
+ frame.PC = addr - 1
+ }
+
+ // We can't write out functions while in the middle of the
+ // Location message, so record new functions we encounter and
+ // write them out after the Location.
+ type newFunc struct {
+ id uint64
+ name, file string
+ }
+ newFuncs := make([]newFunc, 0, 8)
+
id = uint64(len(b.locs)) + 1
b.locs[addr] = int(id)
start := b.pb.startMessage()
b.pb.uint64Opt(tagLocation_ID, id)
- b.pb.uint64Opt(tagLocation_Address, uint64(addr))
- b.pbLine(tagLocation_Line, funcID, int64(lineno))
+ b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
+ for frame.Function != "runtime.goexit" {
+ // Write out each line in frame expansion.
+ funcID := uint64(b.funcs[frame.Function])
+ if funcID == 0 {
+ funcID = uint64(len(b.funcs)) + 1
+ b.funcs[frame.Function] = int(funcID)
+ newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
+ }
+ b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
+ if !more {
+ break
+ }
+ frame, more = frames.Next()
+ }
if len(b.mem) > 0 {
i := sort.Search(len(b.mem), func(i int) bool {
return b.mem[i].end > addr
@@ -210,34 +246,19 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
}
}
b.pb.endMessage(tagProfile_Location, start)
- b.flush()
- return id
-}
-// funcForPC returns the func ID and line number for addr.
-// It may emit to b.pb, so there must be no message encoding in progress.
-func (b *profileBuilder) funcForPC(addr uintptr) (funcID uint64, lineno int) {
- f := runtime.FuncForPC(addr)
- if f == nil {
- return 0, 0
- }
- file, lineno := f.FileLine(addr)
- funcID = uint64(b.funcs[f])
- if funcID != 0 {
- return funcID, lineno
+ // Write out functions we found during frame expansion.
+ for _, fn := range newFuncs {
+ start := b.pb.startMessage()
+ b.pb.uint64Opt(tagFunction_ID, fn.id)
+ b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
+ b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
+ b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
+ b.pb.endMessage(tagProfile_Function, start)
}
- funcID = uint64(len(b.funcs)) + 1
- b.funcs[f] = int(funcID)
- name := f.Name()
- start := b.pb.startMessage()
- b.pb.uint64Opt(tagFunction_ID, funcID)
- b.pb.int64Opt(tagFunction_Name, b.stringIndex(name))
- b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(name))
- b.pb.int64Opt(tagFunction_Filename, b.stringIndex(file))
- b.pb.endMessage(tagProfile_Function, start)
b.flush()
- return funcID, lineno
+ return id
}
// newProfileBuilder returns a new profileBuilder.
@@ -253,7 +274,7 @@ func newProfileBuilder(w io.Writer) *profileBuilder {
strings: []string{""},
stringMap: map[string]int{"": 0},
locs: map[uintptr]int{},
- funcs: map[*runtime.Func]int{},
+ funcs: map[string]int{},
}
b.readMapping()
return b
@@ -351,11 +372,14 @@ func (b *profileBuilder) build() error {
locs = locs[:0]
for i, addr := range e.stk {
- // Addresses from stack traces point to the next instruction after
- // each call. Adjust by -1 to land somewhere on the actual call
- // (except for the leaf, which is not a call).
- if i > 0 {
- addr--
+ // Addresses from stack traces point to the
+ // next instruction after each call, except
+ // for the leaf, which points to where the
+ // signal occurred. locForPC expects return
+ // PCs, so increment the leaf address to look
+ // like a return PC.
+ if i == 0 {
+ addr++
}
l := b.locForPC(addr)
if l == 0 { // runtime.goexit
diff --git a/src/runtime/pprof/protomem.go b/src/runtime/pprof/protomem.go
index 86c7dacfe6..2756cfd28d 100644
--- a/src/runtime/pprof/protomem.go
+++ b/src/runtime/pprof/protomem.go
@@ -29,9 +29,8 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error
for tries := 0; tries < 2; tries++ {
for _, addr := range r.Stack() {
// For heap profiles, all stack
- // addresses are return PCs. Adjust
- // them to be call PCs for locForPC.
- addr--
+ // addresses are return PCs, which is
+ // what locForPC expects.
if hideRuntime {
if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
continue