internal/trace: move v2 tracer into trace directory

This change moves the v2 tracer into the trace directory. Updates #67367 Change-Id: I3657b4227002cb00fdf29c797434800ea796715e Reviewed-on: https://go-review.googlesource.com/c/go/+/584538 Reviewed-by: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
author: Carlos Amedee <carlos@golang.org> 2024-05-09 10:45:01 -0400
committer: Carlos Amedee <carlos@golang.org> 2024-05-17 18:48:18 +0000
commit: 5890b023a549e7ba6b0c563cdf730a91c2de6fae (patch)
tree: 098ead9c43c885cd3176d6584cc4e54b51253081 /src/internal/trace/trace_test.go
parent: 192d65e46b38381653ccbe16cac49f7fa36aac93 (diff)
download: go-5890b023a549e7ba6b0c563cdf730a91c2de6fae.tar.xz
1 files changed, 629 insertions, 0 deletions
diff --git a/src/internal/trace/trace_test.go b/src/internal/trace/trace_test.go
new file mode 100644
index 0000000000..7dc5cbb89d
--- /dev/null
+++ b/src/internal/trace/trace_test.go
@@ -0,0 +1,629 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package trace_test
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"internal/race"
+	"internal/testenv"
+	"internal/trace"
+	"internal/trace/testtrace"
+	"io"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+func TestTraceAnnotations(t *testing.T) {
+	testTraceProg(t, "annotations.go", func(t *testing.T, tb, _ []byte, _ bool) {
+		type evDesc struct {
+			kind trace.EventKind
+			task trace.TaskID
+			args []string
+		}
+		want := []evDesc{
+			{trace.EventTaskBegin, trace.TaskID(1), []string{"task0"}},
+			{trace.EventRegionBegin, trace.TaskID(1), []string{"region0"}},
+			{trace.EventRegionBegin, trace.TaskID(1), []string{"region1"}},
+			{trace.EventLog, trace.TaskID(1), []string{"key0", "0123456789abcdef"}},
+			{trace.EventRegionEnd, trace.TaskID(1), []string{"region1"}},
+			{trace.EventRegionEnd, trace.TaskID(1), []string{"region0"}},
+			{trace.EventTaskEnd, trace.TaskID(1), []string{"task0"}},
+			//  Currently, pre-existing region is not recorded to avoid allocations.
+			{trace.EventRegionBegin, trace.BackgroundTask, []string{"post-existing region"}},
+		}
+		r, err := trace.NewReader(bytes.NewReader(tb))
+		if err != nil {
+			t.Error(err)
+		}
+		for {
+			ev, err := r.ReadEvent()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				t.Fatal(err)
+			}
+			for i, wantEv := range want {
+				if wantEv.kind != ev.Kind() {
+					continue
+				}
+				match := false
+				switch ev.Kind() {
+				case trace.EventTaskBegin, trace.EventTaskEnd:
+					task := ev.Task()
+					match = task.ID == wantEv.task && task.Type == wantEv.args[0]
+				case trace.EventRegionBegin, trace.EventRegionEnd:
+					reg := ev.Region()
+					match = reg.Task == wantEv.task && reg.Type == wantEv.args[0]
+				case trace.EventLog:
+					log := ev.Log()
+					match = log.Task == wantEv.task && log.Category == wantEv.args[0] && log.Message == wantEv.args[1]
+				}
+				if match {
+					want[i] = want[len(want)-1]
+					want = want[:len(want)-1]
+					break
+				}
+			}
+		}
+		if len(want) != 0 {
+			for _, ev := range want {
+				t.Errorf("no match for %s TaskID=%d Args=%#v", ev.kind, ev.task, ev.args)
+			}
+		}
+	})
+}
+
+func TestTraceAnnotationsStress(t *testing.T) {
+	testTraceProg(t, "annotations-stress.go", nil)
+}
+
+func TestTraceCgoCallback(t *testing.T) {
+	testenv.MustHaveCGO(t)
+
+	switch runtime.GOOS {
+	case "plan9", "windows":
+		t.Skipf("cgo callback test requires pthreads and is not supported on %s", runtime.GOOS)
+	}
+	testTraceProg(t, "cgo-callback.go", nil)
+}
+
+func TestTraceCPUProfile(t *testing.T) {
+	testTraceProg(t, "cpu-profile.go", func(t *testing.T, tb, stderr []byte, _ bool) {
+		// Parse stderr which has a CPU profile summary, if everything went well.
+		// (If it didn't, we shouldn't even make it here.)
+		scanner := bufio.NewScanner(bytes.NewReader(stderr))
+		pprofSamples := 0
+		pprofStacks := make(map[string]int)
+		for scanner.Scan() {
+			var stack string
+			var samples int
+			_, err := fmt.Sscanf(scanner.Text(), "%s\t%d", &stack, &samples)
+			if err != nil {
+				t.Fatalf("failed to parse CPU profile summary in stderr: %s\n\tfull:\n%s", scanner.Text(), stderr)
+			}
+			pprofStacks[stack] = samples
+			pprofSamples += samples
+		}
+		if err := scanner.Err(); err != nil {
+			t.Fatalf("failed to parse CPU profile summary in stderr: %v", err)
+		}
+		if pprofSamples == 0 {
+			t.Skip("CPU profile did not include any samples while tracing was active")
+		}
+
+		// Examine the execution tracer's view of the CPU profile samples. Filter it
+		// to only include samples from the single test goroutine. Use the goroutine
+		// ID that was recorded in the events: that should reflect getg().m.curg,
+		// same as the profiler's labels (even when the M is using its g0 stack).
+		totalTraceSamples := 0
+		traceSamples := 0
+		traceStacks := make(map[string]int)
+		r, err := trace.NewReader(bytes.NewReader(tb))
+		if err != nil {
+			t.Error(err)
+		}
+		var hogRegion *trace.Event
+		var hogRegionClosed bool
+		for {
+			ev, err := r.ReadEvent()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				t.Fatal(err)
+			}
+			if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "cpuHogger" {
+				hogRegion = &ev
+			}
+			if ev.Kind() == trace.EventStackSample {
+				totalTraceSamples++
+				if hogRegion != nil && ev.Goroutine() == hogRegion.Goroutine() {
+					traceSamples++
+					var fns []string
+					ev.Stack().Frames(func(frame trace.StackFrame) bool {
+						if frame.Func != "runtime.goexit" {
+							fns = append(fns, fmt.Sprintf("%s:%d", frame.Func, frame.Line))
+						}
+						return true
+					})
+					stack := strings.Join(fns, "|")
+					traceStacks[stack]++
+				}
+			}
+			if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "cpuHogger" {
+				hogRegionClosed = true
+			}
+		}
+		if hogRegion == nil {
+			t.Fatalf("execution trace did not identify cpuHogger goroutine")
+		} else if !hogRegionClosed {
+			t.Fatalf("execution trace did not close cpuHogger region")
+		}
+
+		// The execution trace may drop CPU profile samples if the profiling buffer
+		// overflows. Based on the size of profBufWordCount, that takes a bit over
+		// 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've
+		// hit that case, then we definitely have at least one full buffer's worth
+		// of CPU samples, so we'll call that success.
+		overflowed := totalTraceSamples >= 1900
+		if traceSamples < pprofSamples {
+			t.Logf("execution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples)
+			if !overflowed {
+				t.Fail()
+			}
+		}
+
+		for stack, traceSamples := range traceStacks {
+			pprofSamples := pprofStacks[stack]
+			delete(pprofStacks, stack)
+			if traceSamples < pprofSamples {
+				t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace",
+					stack, pprofSamples, traceSamples)
+				if !overflowed {
+					t.Fail()
+				}
+			}
+		}
+		for stack, pprofSamples := range pprofStacks {
+			t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack)
+			if !overflowed {
+				t.Fail()
+			}
+		}
+
+		if t.Failed() {
+			t.Logf("execution trace CPU samples:")
+			for stack, samples := range traceStacks {
+				t.Logf("%d: %q", samples, stack)
+			}
+			t.Logf("CPU profile:\n%s", stderr)
+		}
+	})
+}
+
+func TestTraceFutileWakeup(t *testing.T) {
+	testTraceProg(t, "futile-wakeup.go", func(t *testing.T, tb, _ []byte, _ bool) {
+		// Check to make sure that no goroutine in the "special" trace region
+		// ends up blocking, unblocking, then immediately blocking again.
+		//
+		// The goroutines are careful to call runtime.Gosched in between blocking,
+		// so there should never be a clean block/unblock on the goroutine unless
+		// the runtime was generating extraneous events.
+		const (
+			entered = iota
+			blocked
+			runnable
+			running
+		)
+		gs := make(map[trace.GoID]int)
+		seenSpecialGoroutines := false
+		r, err := trace.NewReader(bytes.NewReader(tb))
+		if err != nil {
+			t.Error(err)
+		}
+		for {
+			ev, err := r.ReadEvent()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				t.Fatal(err)
+			}
+			// Only track goroutines in the special region we control, so runtime
+			// goroutines don't interfere (it's totally valid in traces for a
+			// goroutine to block, run, and block again; that's not what we care about).
+			if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "special" {
+				seenSpecialGoroutines = true
+				gs[ev.Goroutine()] = entered
+			}
+			if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "special" {
+				delete(gs, ev.Goroutine())
+			}
+			// Track state transitions for goroutines we care about.
+			//
+			// The goroutines we care about will advance through the state machine
+			// of entered -> blocked -> runnable -> running. If in the running state
+			// we block, then we have a futile wakeup. Because of the runtime.Gosched
+			// on these specially marked goroutines, we should end up back in runnable
+			// first. If at any point we go to a different state, switch back to entered
+			// and wait for the next time the goroutine blocks.
+			if ev.Kind() != trace.EventStateTransition {
+				continue
+			}
+			st := ev.StateTransition()
+			if st.Resource.Kind != trace.ResourceGoroutine {
+				continue
+			}
+			id := st.Resource.Goroutine()
+			state, ok := gs[id]
+			if !ok {
+				continue
+			}
+			_, new := st.Goroutine()
+			switch state {
+			case entered:
+				if new == trace.GoWaiting {
+					state = blocked
+				} else {
+					state = entered
+				}
+			case blocked:
+				if new == trace.GoRunnable {
+					state = runnable
+				} else {
+					state = entered
+				}
+			case runnable:
+				if new == trace.GoRunning {
+					state = running
+				} else {
+					state = entered
+				}
+			case running:
+				if new == trace.GoWaiting {
+					t.Fatalf("found futile wakeup on goroutine %d", id)
+				} else {
+					state = entered
+				}
+			}
+			gs[id] = state
+		}
+		if !seenSpecialGoroutines {
+			t.Fatal("did not see a goroutine in a the region 'special'")
+		}
+	})
+}
+
+func TestTraceGCStress(t *testing.T) {
+	testTraceProg(t, "gc-stress.go", nil)
+}
+
+func TestTraceGOMAXPROCS(t *testing.T) {
+	testTraceProg(t, "gomaxprocs.go", nil)
+}
+
+func TestTraceStacks(t *testing.T) {
+	testTraceProg(t, "stacks.go", func(t *testing.T, tb, _ []byte, stress bool) {
+		type frame struct {
+			fn   string
+			line int
+		}
+		type evDesc struct {
+			kind   trace.EventKind
+			match  string
+			frames []frame
+		}
+		// mainLine is the line number of `func main()` in testprog/stacks.go.
+		const mainLine = 21
+		want := []evDesc{
+			{trace.EventStateTransition, "Goroutine Running->Runnable", []frame{
+				{"main.main", mainLine + 82},
+			}},
+			{trace.EventStateTransition, "Goroutine NotExist->Runnable", []frame{
+				{"main.main", mainLine + 11},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"runtime.block", 0},
+				{"main.main.func1", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"runtime.chansend1", 0},
+				{"main.main.func2", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"runtime.chanrecv1", 0},
+				{"main.main.func3", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"runtime.chanrecv1", 0},
+				{"main.main.func4", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+				{"runtime.chansend1", 0},
+				{"main.main", mainLine + 84},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"runtime.chansend1", 0},
+				{"main.main.func5", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+				{"runtime.chanrecv1", 0},
+				{"main.main", mainLine + 85},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"runtime.selectgo", 0},
+				{"main.main.func6", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+				{"runtime.selectgo", 0},
+				{"main.main", mainLine + 86},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"sync.(*Mutex).Lock", 0},
+				{"main.main.func7", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+				{"sync.(*Mutex).Unlock", 0},
+				{"main.main", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"sync.(*WaitGroup).Wait", 0},
+				{"main.main.func8", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+				{"sync.(*WaitGroup).Add", 0},
+				{"sync.(*WaitGroup).Done", 0},
+				{"main.main", mainLine + 91},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"sync.(*Cond).Wait", 0},
+				{"main.main.func9", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
+				{"sync.(*Cond).Signal", 0},
+				{"main.main", 0},
+			}},
+			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+				{"time.Sleep", 0},
+				{"main.main", 0},
+			}},
+			{trace.EventMetric, "/sched/gomaxprocs:threads", []frame{
+				{"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged.
+				{"runtime.startTheWorldGC", 0},
+				{"runtime.GOMAXPROCS", 0},
+				{"main.main", 0},
+			}},
+		}
+		if !stress {
+			// Only check for this stack if !stress because traceAdvance alone could
+			// allocate enough memory to trigger a GC if called frequently enough.
+			// This might cause the runtime.GC call we're trying to match against to
+			// coalesce with an active GC triggered this by traceAdvance. In that case
+			// we won't have an EventRangeBegin event that matches the stace trace we're
+			// looking for, since runtime.GC will not have triggered the GC.
+			gcEv := evDesc{trace.EventRangeBegin, "GC concurrent mark phase", []frame{
+				{"runtime.GC", 0},
+				{"main.main", 0},
+			}}
+			want = append(want, gcEv)
+		}
+		if runtime.GOOS != "windows" && runtime.GOOS != "plan9" {
+			want = append(want, []evDesc{
+				{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
+					{"internal/poll.(*FD).Accept", 0},
+					{"net.(*netFD).accept", 0},
+					{"net.(*TCPListener).accept", 0},
+					{"net.(*TCPListener).Accept", 0},
+					{"main.main.func10", 0},
+				}},
+				{trace.EventStateTransition, "Goroutine Running->Syscall", []frame{
+					{"syscall.read", 0},
+					{"syscall.Read", 0},
+					{"internal/poll.ignoringEINTRIO", 0},
+					{"internal/poll.(*FD).Read", 0},
+					{"os.(*File).read", 0},
+					{"os.(*File).Read", 0},
+					{"main.main.func11", 0},
+				}},
+			}...)
+		}
+		stackMatches := func(stk trace.Stack, frames []frame) bool {
+			i := 0
+			match := true
+			stk.Frames(func(f trace.StackFrame) bool {
+				if f.Func != frames[i].fn {
+					match = false
+					return false
+				}
+				if line := uint64(frames[i].line); line != 0 && line != f.Line {
+					match = false
+					return false
+				}
+				i++
+				return true
+			})
+			return match
+		}
+		r, err := trace.NewReader(bytes.NewReader(tb))
+		if err != nil {
+			t.Error(err)
+		}
+		for {
+			ev, err := r.ReadEvent()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				t.Fatal(err)
+			}
+			for i, wantEv := range want {
+				if wantEv.kind != ev.Kind() {
+					continue
+				}
+				match := false
+				switch ev.Kind() {
+				case trace.EventStateTransition:
+					st := ev.StateTransition()
+					str := ""
+					switch st.Resource.Kind {
+					case trace.ResourceGoroutine:
+						old, new := st.Goroutine()
+						str = fmt.Sprintf("%s %s->%s", st.Resource.Kind, old, new)
+					}
+					match = str == wantEv.match
+				case trace.EventRangeBegin:
+					rng := ev.Range()
+					match = rng.Name == wantEv.match
+				case trace.EventMetric:
+					metric := ev.Metric()
+					match = metric.Name == wantEv.match
+				}
+				match = match && stackMatches(ev.Stack(), wantEv.frames)
+				if match {
+					want[i] = want[len(want)-1]
+					want = want[:len(want)-1]
+					break
+				}
+			}
+		}
+		if len(want) != 0 {
+			for _, ev := range want {
+				t.Errorf("no match for %s Match=%s Stack=%#v", ev.kind, ev.match, ev.frames)
+			}
+		}
+	})
+}
+
+func TestTraceStress(t *testing.T) {
+	switch runtime.GOOS {
+	case "js", "wasip1":
+		t.Skip("no os.Pipe on " + runtime.GOOS)
+	}
+	testTraceProg(t, "stress.go", nil)
+}
+
+func TestTraceStressStartStop(t *testing.T) {
+	switch runtime.GOOS {
+	case "js", "wasip1":
+		t.Skip("no os.Pipe on " + runtime.GOOS)
+	}
+	testTraceProg(t, "stress-start-stop.go", nil)
+}
+
+func TestTraceManyStartStop(t *testing.T) {
+	testTraceProg(t, "many-start-stop.go", nil)
+}
+
+func TestTraceWaitOnPipe(t *testing.T) {
+	switch runtime.GOOS {
+	case "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "solaris":
+		testTraceProg(t, "wait-on-pipe.go", nil)
+		return
+	}
+	t.Skip("no applicable syscall.Pipe on " + runtime.GOOS)
+}
+
+func TestTraceIterPull(t *testing.T) {
+	testTraceProg(t, "iter-pull.go", nil)
+}
+
+func testTraceProg(t *testing.T, progName string, extra func(t *testing.T, trace, stderr []byte, stress bool)) {
+	testenv.MustHaveGoRun(t)
+
+	// Check if we're on a builder.
+	onBuilder := testenv.Builder() != ""
+	onOldBuilder := !strings.Contains(testenv.Builder(), "gotip") && !strings.Contains(testenv.Builder(), "go1")
+
+	testPath := filepath.Join("./testdata/testprog", progName)
+	testName := progName
+	runTest := func(t *testing.T, stress bool, extraGODEBUG string) {
+		// Run the program and capture the trace, which is always written to stdout.
+		cmd := testenv.Command(t, testenv.GoToolPath(t), "run")
+		if race.Enabled {
+			cmd.Args = append(cmd.Args, "-race")
+		}
+		cmd.Args = append(cmd.Args, testPath)
+		cmd.Env = append(os.Environ(), "GOEXPERIMENT=rangefunc")
+		// Add a stack ownership check. This is cheap enough for testing.
+		godebug := "tracecheckstackownership=1"
+		if stress {
+			// Advance a generation constantly to stress the tracer.
+			godebug += ",traceadvanceperiod=0"
+		}
+		if extraGODEBUG != "" {
+			// Add extra GODEBUG flags.
+			godebug += "," + extraGODEBUG
+		}
+		cmd.Env = append(cmd.Env, "GODEBUG="+godebug)
+
+		// Capture stdout and stderr.
+		//
+		// The protocol for these programs is that stdout contains the trace data
+		// and stderr is an expectation in string format.
+		var traceBuf, errBuf bytes.Buffer
+		cmd.Stdout = &traceBuf
+		cmd.Stderr = &errBuf
+		// Run the program.
+		if err := cmd.Run(); err != nil {
+			if errBuf.Len() != 0 {
+				t.Logf("stderr: %s", string(errBuf.Bytes()))
+			}
+			t.Fatal(err)
+		}
+		tb := traceBuf.Bytes()
+
+		// Test the trace and the parser.
+		testReader(t, bytes.NewReader(tb), testtrace.ExpectSuccess())
+
+		// Run some extra validation.
+		if !t.Failed() && extra != nil {
+			extra(t, tb, errBuf.Bytes(), stress)
+		}
+
+		// Dump some more information on failure.
+		if t.Failed() && onBuilder {
+			// Dump directly to the test log on the builder, since this
+			// data is critical for debugging and this is the only way
+			// we can currently make sure it's retained.
+			t.Log("found bad trace; dumping to test log...")
+			s := dumpTraceToText(t, tb)
+			if onOldBuilder && len(s) > 1<<20+512<<10 {
+				// The old build infrastructure truncates logs at ~2 MiB.
+				// Let's assume we're the only failure and give ourselves
+				// up to 1.5 MiB to dump the trace.
+				//
+				// TODO(mknyszek): Remove this when we've migrated off of
+				// the old infrastructure.
+				t.Logf("text trace too large to dump (%d bytes)", len(s))
+			} else {
+				t.Log(s)
+			}
+		} else if t.Failed() || *dumpTraces {
+			// We asked to dump the trace or failed. Write the trace to a file.
+			t.Logf("wrote trace to file: %s", dumpTraceToFile(t, testName, stress, tb))
+		}
+	}
+	t.Run("Default", func(t *testing.T) {
+		runTest(t, false, "")
+	})
+	t.Run("Stress", func(t *testing.T) {
+		if testing.Short() {
+			t.Skip("skipping trace stress tests in short mode")
+		}
+		runTest(t, true, "")
+	})
+	t.Run("AllocFree", func(t *testing.T) {
+		if testing.Short() {
+			t.Skip("skipping trace alloc/free tests in short mode")
+		}
+		runTest(t, false, "traceallocfree=1")
+	})
+}
author	Carlos Amedee <carlos@golang.org>	2024-05-09 10:45:01 -0400
committer	Carlos Amedee <carlos@golang.org>	2024-05-17 18:48:18 +0000
commit	5890b023a549e7ba6b0c563cdf730a91c2de6fae (patch)
tree	098ead9c43c885cd3176d6584cc4e54b51253081 /src/internal/trace/trace_test.go
parent	192d65e46b38381653ccbe16cac49f7fa36aac93 (diff)
download	go-5890b023a549e7ba6b0c563cdf730a91c2de6fae.tar.xz