aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/testdata
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2025-10-24 15:14:59 -0400
committerGopher Robot <gobot@golang.org>2025-11-13 07:44:41 -0800
commit4ebf295b0b1740caac6302cc824ebd0f6175c1d5 (patch)
treeccc6d602bcf37d7a40d144323ee340d14e830ebd /src/runtime/testdata
parent625d8e9b9cd7ede188a8856c5ac88791333baa63 (diff)
downloadgo-4ebf295b0b1740caac6302cc824ebd0f6175c1d5.tar.xz
runtime: prefer to restart Ps on the same M after STW
Today, Ps jump around arbitrarily across STW. Instead, try to keep the P on the previous M it ran on. In the future, we'll likely want to try to expand this beyond STW to create a more general affinity for specific Ms. For this to be useful, the Ps need to have runnable Gs. Today, STW preemption goes through goschedImpl, which places the G on the global run queue. If that was the only G then the P won't have runnable goroutines anymore. It makes more sense to keep the G with its P across STW anyway, so add a special case to goschedImpl for that. On my machine, this CL reduces the error rate in TestTraceSTW from 99.8% to 1.9%. As a nearly 2% error rate shows, there are still cases where this best effort scheduling doesn't work. The most obvious is that while procresize assigns Ps back to their original M, startTheWorldWithSema calls wakep to start a spinning M. The spinning M may steal a goroutine from another P if that P is too slow to start. For #65694. Change-Id: I6a6a636c0969c587d039b68bc68ea16c74ff1fc9 Reviewed-on: https://go-review.googlesource.com/c/go/+/714801 Reviewed-by: Michael Knyszek <mknyszek@google.com> Auto-Submit: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime/testdata')
-rw-r--r--src/runtime/testdata/testprog/stw_mexit.go69
-rw-r--r--src/runtime/testdata/testprog/stw_trace.go99
2 files changed, 168 insertions, 0 deletions
diff --git a/src/runtime/testdata/testprog/stw_mexit.go b/src/runtime/testdata/testprog/stw_mexit.go
new file mode 100644
index 0000000000..b022ef4777
--- /dev/null
+++ b/src/runtime/testdata/testprog/stw_mexit.go
@@ -0,0 +1,69 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "runtime"
+)
+
+func init() {
+ register("mexitSTW", mexitSTW)
+}
+
+// Stress test for pp.oldm pointing to an exited M.
+//
+// If pp.oldm points to an exited M it should be ignored and another M used
+// instead. To stress:
+//
+// 1. Start and exit many threads (thus setting oldm on some P).
+// 2. Meanwhile, frequently stop the world.
+//
+// If procresize incorrect attempts to assign a P to an exited M, likely
+// failure modes are:
+//
+// 1. Crash in startTheWorldWithSema attempting to access the M, if it is nil.
+//
+// 2. Memory corruption elsewhere after startTheWorldWithSema writes to the M,
+// if it is not nil, but is freed and reused for another allocation.
+//
+// 3. Hang on a subsequent stop the world waiting for the P to stop, if the M
+// object is valid, but the M is exited, because startTheWorldWithSema didn't
+// actually wake anything to run the P. The P is _Pidle, but not in the pidle
+// list, thus startTheWorldWithSema will wake for it to actively stop.
+//
+// For this to go wrong, an exited M must fail to clear mp.self and must leave
+// the M on the sched.midle list.
+//
+// Similar to TraceSTW.
+func mexitSTW() {
+ // Ensure we have multiple Ps, but not too many, as we want the
+ // runnable goroutines likely to run on Ps with oldm set.
+ runtime.GOMAXPROCS(4)
+
+ // Background busy work so there is always something runnable.
+ for i := range 2 {
+ go traceSTWTarget(i)
+ }
+
+ // Wait for children to start running.
+ ping.Store(1)
+ for pong[0].Load() != 1 {}
+ for pong[1].Load() != 1 {}
+
+ for range 100 {
+ // Exit a thread. The last P to run this will have it in oldm.
+ go func() {
+ runtime.LockOSThread()
+ }()
+
+ // STW
+ var ms runtime.MemStats
+ runtime.ReadMemStats(&ms)
+ }
+
+ stop.Store(true)
+
+ println("OK")
+}
diff --git a/src/runtime/testdata/testprog/stw_trace.go b/src/runtime/testdata/testprog/stw_trace.go
new file mode 100644
index 0000000000..0fed55b875
--- /dev/null
+++ b/src/runtime/testdata/testprog/stw_trace.go
@@ -0,0 +1,99 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "context"
+ "log"
+ "os"
+ "runtime"
+ "runtime/debug"
+ "runtime/trace"
+ "sync/atomic"
+)
+
+func init() {
+ register("TraceSTW", TraceSTW)
+}
+
+// The parent writes to ping and waits for the children to write back
+// via pong to show that they are running.
+var ping atomic.Uint32
+var pong [2]atomic.Uint32
+
+// Tell runners to stop.
+var stop atomic.Bool
+
+func traceSTWTarget(i int) {
+ for !stop.Load() {
+ // Async preemption often takes 100ms+ to preempt this loop on
+ // windows-386. This makes the test flaky, as the traceReadCPU
+ // timer often fires by the time STW finishes, jumbling the
+ // goroutine scheduling. As a workaround, ensure we have a
+ // morestack call for prompt preemption.
+ ensureMorestack()
+
+ pong[i].Store(ping.Load())
+ }
+}
+
+func TraceSTW() {
+ ctx := context.Background()
+
+ // The idea here is to have 2 target goroutines that are constantly
+ // running. When the world restarts after STW, we expect these
+ // goroutines to continue execution on the same M and P.
+ //
+ // Set GOMAXPROCS=4 to make room for the 2 target goroutines, 1 parent,
+ // and 1 slack for potential misscheduling.
+ //
+ // Disable the GC because GC STW generally moves goroutines (see
+ // https://go.dev/issue/65694). Alternatively, we could just ignore the
+ // trace if the GC runs.
+ runtime.GOMAXPROCS(4)
+ debug.SetGCPercent(0)
+
+ if err := trace.Start(os.Stdout); err != nil {
+ log.Fatalf("failed to start tracing: %v", err)
+ }
+ defer trace.Stop()
+
+ for i := range 2 {
+ go traceSTWTarget(i)
+ }
+
+ // Wait for children to start running.
+ ping.Store(1)
+ for pong[0].Load() != 1 {}
+ for pong[1].Load() != 1 {}
+
+ trace.Log(ctx, "TraceSTW", "start")
+
+ // STW
+ var ms runtime.MemStats
+ runtime.ReadMemStats(&ms)
+
+ // Make sure to run long enough for the children to schedule again
+ // after STW.
+ ping.Store(2)
+ for pong[0].Load() != 2 {}
+ for pong[1].Load() != 2 {}
+
+ trace.Log(ctx, "TraceSTW", "end")
+
+ stop.Store(true)
+}
+
+// Manually insert a morestack call. Leaf functions can omit morestack, but
+// non-leaf functions should include them.
+
+//go:noinline
+func ensureMorestack() {
+ ensureMorestack1()
+}
+
+//go:noinline
+func ensureMorestack1() {
+}