runtime: add CPU samples to execution trace

When the CPU profiler and execution tracer are both active, report the CPU profile samples in the execution trace data stream. Include only samples that arrive on the threads known to the runtime, but include them even when running g0 (such as near the scheduler) or if there's no P (such as near syscalls). Render them in "go tool trace" as instantaneous events. For #16895 Change-Id: I0aa501a7b450c971e510961c0290838729033f7f Reviewed-on: https://go-review.googlesource.com/c/go/+/400795 Reviewed-by: Michael Knyszek <mknyszek@google.com> Run-TryBot: Rhys Hiltner <rhys@justin.tv> Reviewed-by: David Chase <drchase@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
author: Rhys Hiltner <rhys@justin.tv> 2022-04-18 12:32:37 -0700
committer: Michael Knyszek <mknyszek@google.com> 2022-05-03 20:49:46 +0000
commit: 2c0a9884e0dc930c1a3596bc1decf183c8fdcf77 (patch)
tree: 33fedac2be0e9e3b8de4cb652d833a84b7889328 /src/runtime/trace
parent: 52bd1c4d6cc691aa60c71513695dba03062deb59 (diff)
download: go-2c0a9884e0dc930c1a3596bc1decf183c8fdcf77.tar.xz
2 files changed, 156 insertions, 0 deletions
diff --git a/src/runtime/trace/trace.go b/src/runtime/trace/trace.go
index e0c3ca7a1e..cf2b6440b2 100644
--- a/src/runtime/trace/trace.go
+++ b/src/runtime/trace/trace.go
@@ -10,6 +10,8 @@
 // The execution trace captures a wide range of execution events such as
 // goroutine creation/blocking/unblocking, syscall enter/exit/block,
 // GC-related events, changes of heap size, processor start/stop, etc.
+// When CPU profiling is active, the execution tracer makes an effort to
+// include those samples as well.
 // A precise nanosecond-precision timestamp and a stack trace is
 // captured for most events. The generated trace can be interpreted
 // using `go tool trace`.
diff --git a/src/runtime/trace/trace_test.go b/src/runtime/trace/trace_test.go
index b316eafe4c..52a43929b7 100644
--- a/src/runtime/trace/trace_test.go
+++ b/src/runtime/trace/trace_test.go
@@ -6,15 +6,20 @@ package trace_test
 
 import (
 	"bytes"
+	"context"
 	"flag"
+	"fmt"
+	"internal/profile"
 	"internal/race"
 	"internal/trace"
 	"io"
 	"net"
 	"os"
 	"runtime"
+	"runtime/pprof"
 	. "runtime/trace"
 	"strconv"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -581,6 +586,155 @@ func TestTraceFutileWakeup(t *testing.T) {
 	}
 }
 
+func TestTraceCPUProfile(t *testing.T) {
+	if IsEnabled() {
+		t.Skip("skipping because -test.trace is set")
+	}
+
+	cpuBuf := new(bytes.Buffer)
+	if err := pprof.StartCPUProfile(cpuBuf); err != nil {
+		t.Skipf("failed to start CPU profile: %v", err)
+	}
+
+	buf := new(bytes.Buffer)
+	if err := Start(buf); err != nil {
+		t.Fatalf("failed to start tracing: %v", err)
+	}
+
+	dur := 100 * time.Millisecond
+	func() {
+		// Create a region in the execution trace. Set and clear goroutine
+		// labels fully within that region, so we know that any CPU profile
+		// sample with the label must also be eligible for inclusion in the
+		// execution trace.
+		ctx := context.Background()
+		defer StartRegion(ctx, "cpuHogger").End()
+		pprof.Do(ctx, pprof.Labels("tracing", "on"), func(ctx context.Context) {
+			cpuHogger(cpuHog1, &salt1, dur)
+		})
+		// Be sure the execution trace's view, when filtered to this goroutine,
+		// gets many more samples than the CPU profiler when filtered by label.
+		cpuHogger(cpuHog1, &salt1, dur)
+	}()
+
+	Stop()
+	pprof.StopCPUProfile()
+	saveTrace(t, buf, "TestTraceCPUProfile")
+
+	prof, err := profile.Parse(cpuBuf)
+	if err != nil {
+		t.Fatalf("failed to parse CPU profile: %v", err)
+	}
+	pprofSamples := 0
+	pprofStacks := make(map[string]int) // CPU profiler's view, filtered to include the label
+	for _, s := range prof.Sample {
+		if s.Label["tracing"] != nil {
+			samples := int(s.Value[0])
+			pprofSamples += samples
+			var fns []string
+			for _, loc := range s.Location {
+				for _, line := range loc.Line {
+					fns = append(fns, fmt.Sprintf("%s:%d", line.Function.Name, line.Line))
+				}
+			}
+			stack := strings.Join(fns, " ")
+			pprofStacks[stack] += samples
+		}
+	}
+	if pprofSamples == 0 {
+		t.Skipf("CPU profile did not include any samples while tracing was active\n%s", prof)
+	}
+
+	traceSamples := 0
+	traceStacks := make(map[string]int) // Execution tracer's view, filtered to this goroutine
+	events, _ := parseTrace(t, buf)
+	var hogRegion *trace.Event
+	for _, ev := range events {
+		if ev.Type == trace.EvUserRegion && ev.Args[1] == 0 && ev.SArgs[0] == "cpuHogger" {
+			// mode "0" indicates region start
+			hogRegion = ev
+		}
+	}
+	if hogRegion == nil {
+		t.Fatalf("execution trace did not identify cpuHogger goroutine")
+	} else if hogRegion.Link == nil {
+		t.Fatalf("execution trace did not close cpuHogger region")
+	}
+	for _, ev := range events {
+		if ev.Type == trace.EvCPUSample && ev.G == hogRegion.G {
+			traceSamples++
+			var fns []string
+			for _, frame := range ev.Stk {
+				if frame.Fn != "runtime.goexit" {
+					fns = append(fns, fmt.Sprintf("%s:%d", frame.Fn, frame.Line))
+				}
+			}
+			stack := strings.Join(fns, " ")
+			traceStacks[stack]++
+		}
+	}
+	if traceSamples < pprofSamples {
+		t.Errorf("exectution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples)
+	}
+
+	for stack, traceSamples := range traceStacks {
+		pprofSamples := pprofStacks[stack]
+		delete(pprofStacks, stack)
+		if traceSamples < pprofSamples {
+			t.Errorf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace",
+				stack, pprofSamples, traceSamples)
+		}
+	}
+	for stack, pprofSamples := range pprofStacks {
+		t.Errorf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack)
+	}
+
+	if t.Failed() {
+		t.Logf("execution trace CPU samples:")
+		for stack, samples := range traceStacks {
+			t.Logf("%d: %q", samples, stack)
+		}
+		t.Logf("CPU profile:\n%v", prof)
+	}
+}
+
+func cpuHogger(f func(x int) int, y *int, dur time.Duration) {
+	// We only need to get one 100 Hz clock tick, so we've got
+	// a large safety buffer.
+	// But do at least 500 iterations (which should take about 100ms),
+	// otherwise TestCPUProfileMultithreaded can fail if only one
+	// thread is scheduled during the testing period.
+	t0 := time.Now()
+	accum := *y
+	for i := 0; i < 500 || time.Since(t0) < dur; i++ {
+		accum = f(accum)
+	}
+	*y = accum
+}
+
+var (
+	salt1 = 0
+)
+
+// The actual CPU hogging function.
+// Must not call other functions nor access heap/globals in the loop,
+// otherwise under race detector the samples will be in the race runtime.
+func cpuHog1(x int) int {
+	return cpuHog0(x, 1e5)
+}
+
+func cpuHog0(x, n int) int {
+	foo := x
+	for i := 0; i < n; i++ {
+		if foo > 0 {
+			foo *= foo
+		} else {
+			foo *= foo + 1
+		}
+	}
+	return foo
+}
+
 func saveTrace(t *testing.T, buf *bytes.Buffer, name string) {
 	if !*saveTraces {
 		return
author	Rhys Hiltner <rhys@justin.tv>	2022-04-18 12:32:37 -0700
committer	Michael Knyszek <mknyszek@google.com>	2022-05-03 20:49:46 +0000
commit	2c0a9884e0dc930c1a3596bc1decf183c8fdcf77 (patch)
tree	33fedac2be0e9e3b8de4cb652d833a84b7889328 /src/runtime/trace
parent	52bd1c4d6cc691aa60c71513695dba03062deb59 (diff)
download	go-2c0a9884e0dc930c1a3596bc1decf183c8fdcf77.tar.xz