aboutsummaryrefslogtreecommitdiff
path: root/src/internal
diff options
context:
space:
mode:
authorMichael Anthony Knyszek <mknyszek@google.com>2023-05-11 21:09:10 +0000
committerMichael Knyszek <mknyszek@google.com>2023-05-19 17:06:45 +0000
commitb1aadd034c1feb6ac8409aca5f0efd10ef442950 (patch)
tree4872646599671579df0c82627204881bebd087e2 /src/internal
parent944911af5630bec413237b9aba010661a353953e (diff)
downloadgo-b1aadd034c1feb6ac8409aca5f0efd10ef442950.tar.xz
runtime: emit STW events for all pauses, not just those for the GC
Currently STW events are only emitted for GC STWs. There's little reason why the trace can't contain events for every STW: they're rare so don't take up much space in the trace, yet being able to see when the world was stopped is often critical to debugging certain latency issues, especially when they stem from user-level APIs. This change adds new "kinds" to the EvGCSTWStart event, renames the GCSTW events to just "STW," and lets the parser deal with unknown STW kinds for future backwards compatibility. But, this change must break trace compatibility, so it bumps the trace version to Go 1.21. This change also includes a small cleanup in the trace command, which previously checked for STW events when deciding whether user tasks overlapped with a GC. Looking at the source, I don't see a way for STW events to ever enter the stream that that code looks at, so that condition has been deleted. Change-Id: I9a5dc144092c53e92eb6950e9a5504a790ac00cf Reviewed-on: https://go-review.googlesource.com/c/go/+/494495 Reviewed-by: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Michael Knyszek <mknyszek@google.com>
Diffstat (limited to 'src/internal')
-rw-r--r--src/internal/trace/gc.go5
-rw-r--r--src/internal/trace/parser.go72
-rw-r--r--src/internal/trace/parser_test.go15
-rw-r--r--src/internal/trace/testdata/http_1_21_goodbin0 -> 6744 bytes
-rw-r--r--src/internal/trace/testdata/stress_1_21_goodbin0 -> 353725 bytes
-rw-r--r--src/internal/trace/testdata/stress_start_stop_1_21_goodbin0 -> 5002 bytes
-rw-r--r--src/internal/trace/testdata/user_task_region_1_21_goodbin0 -> 2404 bytes
7 files changed, 69 insertions, 23 deletions
diff --git a/src/internal/trace/gc.go b/src/internal/trace/gc.go
index c1bc862340..3bd284e200 100644
--- a/src/internal/trace/gc.go
+++ b/src/internal/trace/gc.go
@@ -27,6 +27,7 @@ type UtilFlags int
const (
// UtilSTW means utilization should account for STW events.
+ // This includes non-GC STW events, which are typically user-requested.
UtilSTW UtilFlags = 1 << iota
// UtilBackground means utilization should account for
// background mark workers.
@@ -93,11 +94,11 @@ func MutatorUtilization(events []*Event, flags UtilFlags) [][]MutatorUtil {
}
ps = append(ps, perP{series: series})
}
- case EvGCSTWStart:
+ case EvSTWStart:
if flags&UtilSTW != 0 {
stw++
}
- case EvGCSTWDone:
+ case EvSTWDone:
if flags&UtilSTW != 0 {
stw--
}
diff --git a/src/internal/trace/parser.go b/src/internal/trace/parser.go
index 0376e914b1..67fa60b8fb 100644
--- a/src/internal/trace/parser.go
+++ b/src/internal/trace/parser.go
@@ -151,7 +151,7 @@ func readTrace(r io.Reader) (ver int, events []rawEvent, strings map[uint64]stri
return
}
switch ver {
- case 1005, 1007, 1008, 1009, 1010, 1011, 1019:
+ case 1005, 1007, 1008, 1009, 1010, 1011, 1019, 1021:
// Note: When adding a new version, confirm that canned traces from the
// old version are part of the test suite. Add them using mkcanned.bash.
break
@@ -420,18 +420,29 @@ func parseEvents(ver int, rawEvents []rawEvent, strings map[uint64]string) (even
if raw.typ == EvGoStartLabel {
e.SArgs = []string{strings[e.Args[2]]}
}
- case EvGCSTWStart:
+ case EvSTWStart:
e.G = 0
- switch e.Args[0] {
- case 0:
- e.SArgs = []string{"mark termination"}
- case 1:
- e.SArgs = []string{"sweep termination"}
- default:
- err = fmt.Errorf("unknown STW kind %d", e.Args[0])
- return
+ if ver < 1021 {
+ switch e.Args[0] {
+ case 0:
+ e.SArgs = []string{"mark termination"}
+ case 1:
+ e.SArgs = []string{"sweep termination"}
+ default:
+ err = fmt.Errorf("unknown STW kind %d", e.Args[0])
+ return
+ }
+ } else if ver == 1021 {
+ if kind := e.Args[0]; kind < uint64(len(stwReasonStringsGo121)) {
+ e.SArgs = []string{stwReasonStringsGo121[kind]}
+ } else {
+ e.SArgs = []string{"unknown"}
+ }
+ } else {
+ // Can't make any assumptions.
+ e.SArgs = []string{"unknown"}
}
- case EvGCStart, EvGCDone, EvGCSTWDone:
+ case EvGCStart, EvGCDone, EvSTWDone:
e.G = 0
case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt,
EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
@@ -653,20 +664,20 @@ func postProcessTrace(ver int, events []*Event) error {
}
evGC.Link = ev
evGC = nil
- case EvGCSTWStart:
+ case EvSTWStart:
evp := &evSTW
if ver < 1010 {
- // Before 1.10, EvGCSTWStart was per-P.
+ // Before 1.10, EvSTWStart was per-P.
evp = &p.evSTW
}
if *evp != nil {
return fmt.Errorf("previous STW is not ended before a new one (offset %v, time %v)", ev.Off, ev.Ts)
}
*evp = ev
- case EvGCSTWDone:
+ case EvSTWDone:
evp := &evSTW
if ver < 1010 {
- // Before 1.10, EvGCSTWDone was per-P.
+ // Before 1.10, EvSTWDone was per-P.
evp = &p.evSTW
}
if *evp == nil {
@@ -1015,7 +1026,7 @@ func argNum(raw rawEvent, ver int) int {
if ver < 1007 {
narg-- // 1.7 added an additional seq arg
}
- case EvGCSTWStart:
+ case EvSTWStart:
if ver < 1010 {
narg-- // 1.10 added an argument
}
@@ -1038,8 +1049,8 @@ const (
EvProcStop = 6 // stop of P [timestamp]
EvGCStart = 7 // GC start [timestamp, seq, stack id]
EvGCDone = 8 // GC done [timestamp]
- EvGCSTWStart = 9 // GC mark termination start [timestamp, kind]
- EvGCSTWDone = 10 // GC mark termination done [timestamp]
+ EvSTWStart = 9 // GC mark termination start [timestamp, kind]
+ EvSTWDone = 10 // GC mark termination done [timestamp]
EvGCSweepStart = 11 // GC sweep start [timestamp, stack id]
EvGCSweepDone = 12 // GC sweep done [timestamp, swept, reclaimed]
EvGoCreate = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
@@ -1098,8 +1109,8 @@ var EventDescriptions = [EvCount]struct {
EvProcStop: {"ProcStop", 1005, false, []string{}, nil},
EvGCStart: {"GCStart", 1005, true, []string{"seq"}, nil}, // in 1.5 format it was {}
EvGCDone: {"GCDone", 1005, false, []string{}, nil},
- EvGCSTWStart: {"GCSTWStart", 1005, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
- EvGCSTWDone: {"GCSTWDone", 1005, false, []string{}, nil},
+ EvSTWStart: {"STWStart", 1005, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
+ EvSTWDone: {"STWDone", 1005, false, []string{}, nil},
EvGCSweepStart: {"GCSweepStart", 1005, true, []string{}, nil},
EvGCSweepDone: {"GCSweepDone", 1005, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {}
EvGoCreate: {"GoCreate", 1005, true, []string{"g", "stack"}, nil},
@@ -1140,3 +1151,24 @@ var EventDescriptions = [EvCount]struct {
EvUserLog: {"UserLog", 1011, true, []string{"id", "keyid"}, []string{"category", "message"}},
EvCPUSample: {"CPUSample", 1019, true, []string{"ts", "p", "g"}, nil},
}
+
+// Copied from src/runtime/proc.go:stwReasonStrings in Go 1.21.
+var stwReasonStringsGo121 = [...]string{
+ "unknown",
+ "GC mark termination",
+ "GC sweep termination",
+ "write heap dump",
+ "goroutine profile",
+ "goroutine profile cleanup",
+ "all goroutines stack trace",
+ "read mem stats",
+ "AllThreadsSyscall",
+ "GOMAXPROCS",
+ "start trace",
+ "stop trace",
+ "CountPagesInUse (test)",
+ "ReadMetricsSlow (test)",
+ "ReadMemStatsSlow (test)",
+ "PageCachePagesLeaked (test)",
+ "ResetDebugLog (test)",
+}
diff --git a/src/internal/trace/parser_test.go b/src/internal/trace/parser_test.go
index cdab95a59e..fce660c289 100644
--- a/src/internal/trace/parser_test.go
+++ b/src/internal/trace/parser_test.go
@@ -52,12 +52,13 @@ func TestParseCanned(t *testing.T) {
}
// Instead of Parse that requires a proper binary name for old traces,
// we use 'parse' that omits symbol lookup if an empty string is given.
- _, _, err = parse(bytes.NewReader(data), "")
+ ver, res, err := parse(bytes.NewReader(data), "")
switch {
case strings.HasSuffix(f.Name(), "_good"):
if err != nil {
t.Errorf("failed to parse good trace %v: %v", f.Name(), err)
}
+ checkTrace(t, ver, res)
case strings.HasSuffix(f.Name(), "_unordered"):
if err != ErrTimeOrder {
t.Errorf("unordered trace is not detected %v: %v", f.Name(), err)
@@ -68,6 +69,18 @@ func TestParseCanned(t *testing.T) {
}
}
+// checkTrace walks over a good trace and makes a bunch of additional checks
+// that may not cause the parser to outright fail.
+func checkTrace(t *testing.T, ver int, res ParseResult) {
+ for _, ev := range res.Events {
+ if ver >= 1021 {
+ if ev.Type == EvSTWStart && ev.SArgs[0] == "unknown" {
+ t.Errorf("found unknown STW event; update stwReasonStrings?")
+ }
+ }
+ }
+}
+
func TestParseVersion(t *testing.T) {
tests := map[string]int{
"go 1.5 trace\x00\x00\x00\x00": 1005,
diff --git a/src/internal/trace/testdata/http_1_21_good b/src/internal/trace/testdata/http_1_21_good
new file mode 100644
index 0000000000..b3295f9e5d
--- /dev/null
+++ b/src/internal/trace/testdata/http_1_21_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_1_21_good b/src/internal/trace/testdata/stress_1_21_good
new file mode 100644
index 0000000000..1ade5e0eb6
--- /dev/null
+++ b/src/internal/trace/testdata/stress_1_21_good
Binary files differ
diff --git a/src/internal/trace/testdata/stress_start_stop_1_21_good b/src/internal/trace/testdata/stress_start_stop_1_21_good
new file mode 100644
index 0000000000..fff46a9a07
--- /dev/null
+++ b/src/internal/trace/testdata/stress_start_stop_1_21_good
Binary files differ
diff --git a/src/internal/trace/testdata/user_task_region_1_21_good b/src/internal/trace/testdata/user_task_region_1_21_good
new file mode 100644
index 0000000000..5c01a6405d
--- /dev/null
+++ b/src/internal/trace/testdata/user_task_region_1_21_good
Binary files differ