diff options
| author | Michael Anthony Knyszek <mknyszek@google.com> | 2023-05-11 21:09:10 +0000 |
|---|---|---|
| committer | Michael Knyszek <mknyszek@google.com> | 2023-05-19 17:06:45 +0000 |
| commit | b1aadd034c1feb6ac8409aca5f0efd10ef442950 (patch) | |
| tree | 4872646599671579df0c82627204881bebd087e2 /src/internal | |
| parent | 944911af5630bec413237b9aba010661a353953e (diff) | |
| download | go-b1aadd034c1feb6ac8409aca5f0efd10ef442950.tar.xz | |
runtime: emit STW events for all pauses, not just those for the GC
Currently STW events are only emitted for GC STWs. There's little reason
why the trace can't contain events for every STW: they're rare so don't
take up much space in the trace, yet being able to see when the world
was stopped is often critical to debugging certain latency issues,
especially when they stem from user-level APIs.
This change adds new "kinds" to the EvGCSTWStart event, renames the
GCSTW events to just "STW," and lets the parser deal with unknown STW
kinds for future backwards compatibility.
But, this change must break trace compatibility, so it bumps the trace
version to Go 1.21.
This change also includes a small cleanup in the trace command, which
previously checked for STW events when deciding whether user tasks
overlapped with a GC. Looking at the source, I don't see a way for STW
events to ever enter the stream that that code looks at, so that
condition has been deleted.
Change-Id: I9a5dc144092c53e92eb6950e9a5504a790ac00cf
Reviewed-on: https://go-review.googlesource.com/c/go/+/494495
Reviewed-by: Michael Pratt <mpratt@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Diffstat (limited to 'src/internal')
| -rw-r--r-- | src/internal/trace/gc.go | 5 | ||||
| -rw-r--r-- | src/internal/trace/parser.go | 72 | ||||
| -rw-r--r-- | src/internal/trace/parser_test.go | 15 | ||||
| -rw-r--r-- | src/internal/trace/testdata/http_1_21_good | bin | 0 -> 6744 bytes | |||
| -rw-r--r-- | src/internal/trace/testdata/stress_1_21_good | bin | 0 -> 353725 bytes | |||
| -rw-r--r-- | src/internal/trace/testdata/stress_start_stop_1_21_good | bin | 0 -> 5002 bytes | |||
| -rw-r--r-- | src/internal/trace/testdata/user_task_region_1_21_good | bin | 0 -> 2404 bytes |
7 files changed, 69 insertions, 23 deletions
diff --git a/src/internal/trace/gc.go b/src/internal/trace/gc.go index c1bc862340..3bd284e200 100644 --- a/src/internal/trace/gc.go +++ b/src/internal/trace/gc.go @@ -27,6 +27,7 @@ type UtilFlags int const ( // UtilSTW means utilization should account for STW events. + // This includes non-GC STW events, which are typically user-requested. UtilSTW UtilFlags = 1 << iota // UtilBackground means utilization should account for // background mark workers. @@ -93,11 +94,11 @@ func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil { } ps = append(ps, perP{series: series}) } - case EvGCSTWStart: + case EvSTWStart: if flags&UtilSTW != 0 { stw++ } - case EvGCSTWDone: + case EvSTWDone: if flags&UtilSTW != 0 { stw-- } diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go index 0376e914b1..67fa60b8fb 100644 --- a/src/internal/trace/parser.go +++ b/src/internal/trace/parser.go @@ -151,7 +151,7 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri return } switch ver { - case 1005, 1007, 1008, 1009, 1010, 1011, 1019: + case 1005, 1007, 1008, 1009, 1010, 1011, 1019, 1021: // Note: When adding a new version, confirm that canned traces from the // old version are part of the test suite. Add them using mkcanned.bash. break @@ -420,18 +420,29 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even if raw.typ == EvGoStartLabel { e.SArgs = []string{strings[e.Args[2]]} } - case EvGCSTWStart: + case EvSTWStart: e.G = 0 - switch e.Args[0] { - case 0: - e.SArgs = []string{"mark termination"} - case 1: - e.SArgs = []string{"sweep termination"} - default: - err = fmt.Errorf("unknown STW kind %d", e.Args[0]) - return + if ver < 1021 { + switch e.Args[0] { + case 0: + e.SArgs = []string{"mark termination"} + case 1: + e.SArgs = []string{"sweep termination"} + default: + err = fmt.Errorf("unknown STW kind %d", e.Args[0]) + return + } + } else if ver == 1021 { + if kind := e.Args[0]; kind < uint64(len(stwReasonStringsGo121)) { + e.SArgs = []string{stwReasonStringsGo121[kind]} + } else { + e.SArgs = []string{"unknown"} + } + } else { + // Can't make any assumptions. + e.SArgs = []string{"unknown"} } - case EvGCStart, EvGCDone, EvGCSTWDone: + case EvGCStart, EvGCDone, EvSTWDone: e.G = 0 case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt, EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv, @@ -653,20 +664,20 @@ func postProcessTrace(ver int, events []*Event) error { } evGC.Link = ev evGC = nil - case EvGCSTWStart: + case EvSTWStart: evp := &evSTW if ver < 1010 { - // Before 1.10, EvGCSTWStart was per-P. + // Before 1.10, EvSTWStart was per-P. evp = &p.evSTW } if *evp != nil { return fmt.Errorf("previous STW is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts) } *evp = ev - case EvGCSTWDone: + case EvSTWDone: evp := &evSTW if ver < 1010 { - // Before 1.10, EvGCSTWDone was per-P. + // Before 1.10, EvSTWDone was per-P. evp = &p.evSTW } if *evp == nil { @@ -1015,7 +1026,7 @@ func argNum(raw rawEvent, ver int) int { if ver < 1007 { narg-- // 1.7 added an additional seq arg } - case EvGCSTWStart: + case EvSTWStart: if ver < 1010 { narg-- // 1.10 added an argument } @@ -1038,8 +1049,8 @@ const ( EvProcStop = 6 // stop of P [timestamp] EvGCStart = 7 // GC start [timestamp, seq, stack id] EvGCDone = 8 // GC done [timestamp] - EvGCSTWStart = 9 // GC mark termination start [timestamp, kind] - EvGCSTWDone = 10 // GC mark termination done [timestamp] + EvSTWStart = 9 // GC mark termination start [timestamp, kind] + EvSTWDone = 10 // GC mark termination done [timestamp] EvGCSweepStart = 11 // GC sweep start [timestamp, stack id] EvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed] EvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id] @@ -1098,8 +1109,8 @@ var EventDescriptions = [EvCount]struct { EvProcStop: {"ProcStop", 1005, false, []string{}, nil}, EvGCStart: {"GCStart", 1005, true, []string{"seq"}, nil}, // in 1.5 format it was {} EvGCDone: {"GCDone", 1005, false, []string{}, nil}, - EvGCSTWStart: {"GCSTWStart", 1005, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0}) - EvGCSTWDone: {"GCSTWDone", 1005, false, []string{}, nil}, + EvSTWStart: {"STWStart", 1005, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0}) + EvSTWDone: {"STWDone", 1005, false, []string{}, nil}, EvGCSweepStart: {"GCSweepStart", 1005, true, []string{}, nil}, EvGCSweepDone: {"GCSweepDone", 1005, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {} EvGoCreate: {"GoCreate", 1005, true, []string{"g", "stack"}, nil}, @@ -1140,3 +1151,24 @@ var EventDescriptions = [EvCount]struct { EvUserLog: {"UserLog", 1011, true, []string{"id", "keyid"}, []string{"category", "message"}}, EvCPUSample: {"CPUSample", 1019, true, []string{"ts", "p", "g"}, nil}, } + +// Copied from src/runtime/proc.go:stwReasonStrings in Go 1.21. +var stwReasonStringsGo121 = [...]string{ + "unknown", + "GC mark termination", + "GC sweep termination", + "write heap dump", + "goroutine profile", + "goroutine profile cleanup", + "all goroutines stack trace", + "read mem stats", + "AllThreadsSyscall", + "GOMAXPROCS", + "start trace", + "stop trace", + "CountPagesInUse (test)", + "ReadMetricsSlow (test)", + "ReadMemStatsSlow (test)", + "PageCachePagesLeaked (test)", + "ResetDebugLog (test)", +} diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go index cdab95a59e..fce660c289 100644 --- a/src/internal/trace/parser_test.go +++ b/src/internal/trace/parser_test.go @@ -52,12 +52,13 @@ func TestParseCanned(t *testing.T) { } // Instead of Parse that requires a proper binary name for old traces, // we use 'parse' that omits symbol lookup if an empty string is given. - _, _, err = parse(bytes.NewReader(data), "") + ver, res, err := parse(bytes.NewReader(data), "") switch { case strings.HasSuffix(f.Name(), "_good"): if err != nil { t.Errorf("failed to parse good trace %v: %v", f.Name(), err) } + checkTrace(t, ver, res) case strings.HasSuffix(f.Name(), "_unordered"): if err != ErrTimeOrder { t.Errorf("unordered trace is not detected %v: %v", f.Name(), err) @@ -68,6 +69,18 @@ func TestParseCanned(t *testing.T) { } } +// checkTrace walks over a good trace and makes a bunch of additional checks +// that may not cause the parser to outright fail. +func checkTrace(t *testing.T, ver int, res ParseResult) { + for _, ev := range res.Events { + if ver >= 1021 { + if ev.Type == EvSTWStart && ev.SArgs[0] == "unknown" { + t.Errorf("found unknown STW event; update stwReasonStrings?") + } + } + } +} + func TestParseVersion(t *testing.T) { tests := map[string]int{ "go 1.5 trace\x00\x00\x00\x00": 1005, diff --git a/src/internal/trace/testdata/http_1_21_good b/src/internal/trace/testdata/http_1_21_good Binary files differnew file mode 100644 index 0000000000..b3295f9e5d --- /dev/null +++ b/src/internal/trace/testdata/http_1_21_good diff --git a/src/internal/trace/testdata/stress_1_21_good b/src/internal/trace/testdata/stress_1_21_good Binary files differnew file mode 100644 index 0000000000..1ade5e0eb6 --- /dev/null +++ b/src/internal/trace/testdata/stress_1_21_good diff --git a/src/internal/trace/testdata/stress_start_stop_1_21_good b/src/internal/trace/testdata/stress_start_stop_1_21_good Binary files differnew file mode 100644 index 0000000000..fff46a9a07 --- /dev/null +++ b/src/internal/trace/testdata/stress_start_stop_1_21_good diff --git a/src/internal/trace/testdata/user_task_region_1_21_good b/src/internal/trace/testdata/user_task_region_1_21_good Binary files differnew file mode 100644 index 0000000000..5c01a6405d --- /dev/null +++ b/src/internal/trace/testdata/user_task_region_1_21_good |
