diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/runtime/mgcpacer.go | 9 | ||||
| -rw-r--r-- | src/runtime/proc.go | 79 | ||||
| -rw-r--r-- | src/runtime/proc_test.go | 50 | ||||
| -rw-r--r-- | src/runtime/testdata/testprog/stw_trace.go | 111 |
4 files changed, 236 insertions, 13 deletions
diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go index f78fb6f636..388cce83cd 100644 --- a/src/runtime/mgcpacer.go +++ b/src/runtime/mgcpacer.go @@ -870,9 +870,12 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) { gcCPULimiter.update(now) } - ok, now := c.assignWaitingGCWorker(pp, now) - if !ok { - return nil, now + // If a worker wasn't already assigned by procresize, assign one now. + if pp.nextGCMarkWorker == nil { + ok, now := c.assignWaitingGCWorker(pp, now) + if !ok { + return nil, now + } } node := pp.nextGCMarkWorker diff --git a/src/runtime/proc.go b/src/runtime/proc.go index a378b8c39d..62e79e74e2 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -4135,11 +4135,23 @@ top: gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available + // May be on a new P. + pp = mp.p.ptr() + // findRunnable may have collected an allp snapshot. The snapshot is // only required within findRunnable. Clear it to all GC to collect the // slice. mp.clearAllpSnapshot() + // If the P was assigned a next GC mark worker but findRunnable + // selected anything else, release the worker so another P may run it. + // + // N.B. If this occurs because a higher-priority goroutine was selected + // (trace reader), then tryWakeP is set, which will wake another P to + // run the worker. If this occurs because the GC is no longer active, + // there is no need to wakep. + gcController.releaseNextGCMarkWorker(pp) + if debug.dontfreezetheworld > 0 && freezing.Load() { // See comment in freezetheworld. We don't want to perturb // scheduler state, so we didn't gcstopm in findRunnable, but @@ -6036,8 +6048,10 @@ func procresize(nprocs int32) *p { unlock(&allpLock) } + // Assign Ms to Ps with runnable goroutines. var runnablePs *p var runnablePsNeedM *p + var idlePs *p for i := nprocs - 1; i >= 0; i-- { pp := allp[i] if gp.m.p.ptr() == pp { @@ -6045,7 +6059,8 @@ func procresize(nprocs int32) *p { } pp.status = _Pidle if runqempty(pp) { - pidleput(pp, now) + pp.link.set(idlePs) + idlePs = pp continue } @@ -6071,6 +6086,8 @@ func procresize(nprocs int32) *p { pp.link.set(runnablePs) runnablePs = pp } + // Assign Ms to remaining runnable Ps without usable oldm. See comment + // above. for runnablePsNeedM != nil { pp := runnablePsNeedM runnablePsNeedM = pp.link.ptr() @@ -6081,6 +6098,62 @@ func procresize(nprocs int32) *p { runnablePs = pp } + // Now that we've assigned Ms to Ps with runnable goroutines, assign GC + // mark workers to remaining idle Ps, if needed. + // + // By assigning GC workers to Ps here, we slightly speed up starting + // the world, as we will start enough Ps to run all of the user + // goroutines and GC mark workers all at once, rather than using a + // sequence of wakep calls as each P's findRunnable realizes it needs + // to run a mark worker instead of a user goroutine. + // + // By assigning GC workers to Ps only _after_ previously-running Ps are + // assigned Ms, we ensure that goroutines previously running on a P + // continue to run on the same P, with GC mark workers preferring + // previously-idle Ps. This helps prevent goroutines from shuffling + // around too much across STW. + // + // N.B., if there aren't enough Ps left in idlePs for all of the GC + // mark workers, then findRunnable will still choose to run mark + // workers on Ps assigned above. + // + // N.B., we do this during any STW in the mark phase, not just the + // sweep termination STW that starts the mark phase. gcBgMarkWorker + // always preempts by removing itself from the P, so even unrelated + // STWs during the mark require that Ps reselect mark workers upon + // restart. + if gcBlackenEnabled != 0 { + for idlePs != nil { + pp := idlePs + + ok, _ := gcController.assignWaitingGCWorker(pp, now) + if !ok { + // No more mark workers needed. + break + } + + // Got a worker, P is now runnable. + // + // mget may return nil if there aren't enough Ms, in + // which case startTheWorldWithSema will start one. + // + // N.B. findRunnableGCWorker will make the worker G + // itself runnable. + idlePs = pp.link.ptr() + mp := mget() + pp.m.set(mp) + pp.link.set(runnablePs) + runnablePs = pp + } + } + + // Finally, any remaining Ps are truly idle. + for idlePs != nil { + pp := idlePs + idlePs = pp.link.ptr() + pidleput(pp, now) + } + stealOrder.reset(uint32(nprocs)) var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs)) @@ -6183,6 +6256,10 @@ func releasepNoTrace() *p { print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n") throw("releasep: invalid p state") } + + // P must clear if nextGCMarkWorker if it stops. + gcController.releaseNextGCMarkWorker(pp) + gp.m.p = 0 pp.m = 0 pp.status = _Pidle diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go index b3084f4895..35a1aeab1f 100644 --- a/src/runtime/proc_test.go +++ b/src/runtime/proc_test.go @@ -1221,7 +1221,7 @@ func TestTraceSTW(t *testing.T) { var errors int for i := range runs { - err := runTestTracesSTW(t, i) + err := runTestTracesSTW(t, i, "TraceSTW", "stop-the-world (read mem stats)") if err != nil { t.Logf("Run %d failed: %v", i, err) errors++ @@ -1235,7 +1235,43 @@ func TestTraceSTW(t *testing.T) { } } -func runTestTracesSTW(t *testing.T, run int) (err error) { +// TestTraceGCSTW verifies that goroutines continue running on the same M and P +// after a GC STW. +func TestTraceGCSTW(t *testing.T) { + // Very similar to TestTraceSTW, but using a STW that starts the GC. + // When the GC starts, the background GC mark workers start running, + // which provide an additional source of disturbance to the scheduler. + // + // procresize assigns GC workers to previously-idle Ps to avoid + // changing what the previously-running Ps are doing. + + if testing.Short() { + t.Skip("skipping in -short mode") + } + + if runtime.NumCPU() < 8 { + t.Skip("This test sets GOMAXPROCS=8 and wants to avoid thread descheduling as much as possible. Skip on machines with less than 8 CPUs") + } + + const runs = 50 + + var errors int + for i := range runs { + err := runTestTracesSTW(t, i, "TraceGCSTW", "stop-the-world (GC sweep termination)") + if err != nil { + t.Logf("Run %d failed: %v", i, err) + errors++ + } + } + + pct := float64(errors)/float64(runs) + t.Logf("Errors: %d/%d = %f%%", errors, runs, 100*pct) + if pct > 0.25 { + t.Errorf("Error rate too high") + } +} + +func runTestTracesSTW(t *testing.T, run int, name, stwType string) (err error) { t.Logf("Run %d", run) // By default, TSAN sleeps for 1s at exit to allow background @@ -1243,7 +1279,7 @@ func runTestTracesSTW(t *testing.T, run int) (err error) { // much, since we are running 50 iterations, so disable the sleep. // // Outside of race mode, GORACE does nothing. - buf := []byte(runTestProg(t, "testprog", "TraceSTW", "GORACE=atexit_sleep_ms=0")) + buf := []byte(runTestProg(t, "testprog", name, "GORACE=atexit_sleep_ms=0")) // We locally "fail" the run (return an error) if the trace exhibits // unwanted scheduling. i.e., the target goroutines did not remain on @@ -1253,7 +1289,7 @@ func runTestTracesSTW(t *testing.T, run int) (err error) { // occur, such as a trace parse error. defer func() { if err != nil || t.Failed() { - testtrace.Dump(t, fmt.Sprintf("TestTraceSTW-run%d", run), []byte(buf), false) + testtrace.Dump(t, fmt.Sprintf("Test%s-run%d", name, run), []byte(buf), false) } }() @@ -1509,12 +1545,10 @@ findEnd: break findEnd case trace.EventRangeBegin: r := ev.Range() - if r.Name == "stop-the-world (read mem stats)" { + if r.Name == stwType { // Note when we see the STW begin. This is not // load bearing; it's purpose is simply to fail - // the test if we manage to remove the STW from - // ReadMemStat, so we remember to change this - // test to add some new source of STW. + // the test if we accidentally remove the STW. stwSeen = true } } diff --git a/src/runtime/testdata/testprog/stw_trace.go b/src/runtime/testdata/testprog/stw_trace.go index 0fed55b875..0fa15da09e 100644 --- a/src/runtime/testdata/testprog/stw_trace.go +++ b/src/runtime/testdata/testprog/stw_trace.go @@ -7,15 +7,18 @@ package main import ( "context" "log" + "math/rand/v2" "os" "runtime" "runtime/debug" + "runtime/metrics" "runtime/trace" "sync/atomic" ) func init() { register("TraceSTW", TraceSTW) + register("TraceGCSTW", TraceGCSTW) } // The parent writes to ping and waits for the children to write back @@ -53,7 +56,7 @@ func TraceSTW() { // https://go.dev/issue/65694). Alternatively, we could just ignore the // trace if the GC runs. runtime.GOMAXPROCS(4) - debug.SetGCPercent(0) + debug.SetGCPercent(-1) if err := trace.Start(os.Stdout); err != nil { log.Fatalf("failed to start tracing: %v", err) @@ -86,6 +89,112 @@ func TraceSTW() { stop.Store(true) } +// Variant of TraceSTW for GC STWs. We want the GC mark workers to start on +// previously-idle Ps, rather than bumping the current P. +func TraceGCSTW() { + ctx := context.Background() + + // The idea here is to have 2 target goroutines that are constantly + // running. When the world restarts after STW, we expect these + // goroutines to continue execution on the same M and P. + // + // Set GOMAXPROCS=8 to make room for the 2 target goroutines, 1 parent, + // 2 dedicated workers, and a bit of slack. + // + // Disable the GC initially so we can be sure it only triggers once we + // are ready. + runtime.GOMAXPROCS(8) + debug.SetGCPercent(-1) + + if err := trace.Start(os.Stdout); err != nil { + log.Fatalf("failed to start tracing: %v", err) + } + defer trace.Stop() + + for i := range 2 { + go traceSTWTarget(i) + } + + // Wait for children to start running. + ping.Store(1) + for pong[0].Load() != 1 {} + for pong[1].Load() != 1 {} + + trace.Log(ctx, "TraceSTW", "start") + + // STW + triggerGC() + + // Make sure to run long enough for the children to schedule again + // after STW. This is included for good measure, but the goroutines + // really ought to have already scheduled since the entire GC + // completed. + ping.Store(2) + for pong[0].Load() != 2 {} + for pong[1].Load() != 2 {} + + trace.Log(ctx, "TraceSTW", "end") + + stop.Store(true) +} + +func triggerGC() { + // Allocate a bunch to trigger the GC rather than using runtime.GC. The + // latter blocks until the GC is complete, which is convenient, but + // messes with scheduling as it gives this P a chance to steal the + // other goroutines before their Ps get up and running again. + + // Bring heap size up prior to enabling the GC to ensure that there is + // a decent amount of work in case the GC triggers immediately upon + // re-enabling. + for range 1000 { + alloc() + } + + sample := make([]metrics.Sample, 1) + sample[0].Name = "/gc/cycles/total:gc-cycles" + metrics.Read(sample) + + start := sample[0].Value.Uint64() + + debug.SetGCPercent(100) + + // Keep allocating until the GC is complete. We really only need to + // continue until the mark workers are scheduled, but there isn't a + // good way to measure that. + for { + metrics.Read(sample) + if sample[0].Value.Uint64() != start { + return + } + + alloc() + } +} + +// Allocate a tree data structure to generate plenty of scan work for the GC. + +type node struct { + children []*node +} + +var gcSink node + +func alloc() { + // 10% chance of adding a node a each layer. + + curr := &gcSink + for { + if len(curr.children) == 0 || rand.Float32() < 0.1 { + curr.children = append(curr.children, new(node)) + return + } + + i := rand.IntN(len(curr.children)) + curr = curr.children[i] + } +} + // Manually insert a morestack call. Leaf functions can omit morestack, but // non-leaf functions should include them. |
