aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/proc.go
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2025-11-17 16:08:21 -0500
committerGopher Robot <gobot@golang.org>2025-11-20 08:10:14 -0800
commita18aff805706bfdaeb9aca042111fae32f9f8b61 (patch)
treed2dbb7b60f0dc01346d7f28444aade79efee655a /src/runtime/proc.go
parent829779f4fe7e002b959a2f4966aa9e21c59e418c (diff)
downloadgo-a18aff805706bfdaeb9aca042111fae32f9f8b61.tar.xz
runtime: select GC mark workers during start-the-world
When the GC starts today, procresize and startTheWorldWithSema don't consider the additional Ps required to run the mark workers. procresize and startTheWorldWithSema resume only the Ps necessary to run the normal user goroutines. Once those Ps start, findRunnable and findRunnableGCWorker determine that a GC worker is necessary and run the worker instead, calling wakep to wake another P to run the original user goroutine. This is unfortunate because it disrupts the intentional placement of Ps on Ms that procresize does. It also has the unfortunate side effect of slightly delaying start-the-world time, as it takes several sequential wakeps to get all Ps started. To address this, procresize explicitly assigns GC mark workers to Ps before starting the world. The assignment occurs _after_ selecting runnable Ps, so that we prefer to select Ps that were previously idle. Note that if fewer than 25% of Ps are idle then we won't be able to assign all dedicated workers, and some of the Ps intended for user goroutines will convert to dedicated workers once they reach findRunnableGCWorker. Also note that stack scanning temporarily suspends the goroutine. Resume occurs through ready, which will move the goroutine to the local runq of the P that did the scan. Thus there is still a source of migration at some point during the GC. For #65694. Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest Change-Id: I6a6a636c51f39f4f4bc716aa87de68f6ebe163a5 Reviewed-on: https://go-review.googlesource.com/c/go/+/721002 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Michael Pratt <mpratt@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
Diffstat (limited to 'src/runtime/proc.go')
-rw-r--r--src/runtime/proc.go79
1 files changed, 78 insertions, 1 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index a378b8c39d..62e79e74e2 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -4135,11 +4135,23 @@ top:
gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available
+ // May be on a new P.
+ pp = mp.p.ptr()
+
// findRunnable may have collected an allp snapshot. The snapshot is
// only required within findRunnable. Clear it to all GC to collect the
// slice.
mp.clearAllpSnapshot()
+ // If the P was assigned a next GC mark worker but findRunnable
+ // selected anything else, release the worker so another P may run it.
+ //
+ // N.B. If this occurs because a higher-priority goroutine was selected
+ // (trace reader), then tryWakeP is set, which will wake another P to
+ // run the worker. If this occurs because the GC is no longer active,
+ // there is no need to wakep.
+ gcController.releaseNextGCMarkWorker(pp)
+
if debug.dontfreezetheworld > 0 && freezing.Load() {
// See comment in freezetheworld. We don't want to perturb
// scheduler state, so we didn't gcstopm in findRunnable, but
@@ -6036,8 +6048,10 @@ func procresize(nprocs int32) *p {
unlock(&allpLock)
}
+ // Assign Ms to Ps with runnable goroutines.
var runnablePs *p
var runnablePsNeedM *p
+ var idlePs *p
for i := nprocs - 1; i >= 0; i-- {
pp := allp[i]
if gp.m.p.ptr() == pp {
@@ -6045,7 +6059,8 @@ func procresize(nprocs int32) *p {
}
pp.status = _Pidle
if runqempty(pp) {
- pidleput(pp, now)
+ pp.link.set(idlePs)
+ idlePs = pp
continue
}
@@ -6071,6 +6086,8 @@ func procresize(nprocs int32) *p {
pp.link.set(runnablePs)
runnablePs = pp
}
+ // Assign Ms to remaining runnable Ps without usable oldm. See comment
+ // above.
for runnablePsNeedM != nil {
pp := runnablePsNeedM
runnablePsNeedM = pp.link.ptr()
@@ -6081,6 +6098,62 @@ func procresize(nprocs int32) *p {
runnablePs = pp
}
+ // Now that we've assigned Ms to Ps with runnable goroutines, assign GC
+ // mark workers to remaining idle Ps, if needed.
+ //
+ // By assigning GC workers to Ps here, we slightly speed up starting
+ // the world, as we will start enough Ps to run all of the user
+ // goroutines and GC mark workers all at once, rather than using a
+ // sequence of wakep calls as each P's findRunnable realizes it needs
+ // to run a mark worker instead of a user goroutine.
+ //
+ // By assigning GC workers to Ps only _after_ previously-running Ps are
+ // assigned Ms, we ensure that goroutines previously running on a P
+ // continue to run on the same P, with GC mark workers preferring
+ // previously-idle Ps. This helps prevent goroutines from shuffling
+ // around too much across STW.
+ //
+ // N.B., if there aren't enough Ps left in idlePs for all of the GC
+ // mark workers, then findRunnable will still choose to run mark
+ // workers on Ps assigned above.
+ //
+ // N.B., we do this during any STW in the mark phase, not just the
+ // sweep termination STW that starts the mark phase. gcBgMarkWorker
+ // always preempts by removing itself from the P, so even unrelated
+ // STWs during the mark require that Ps reselect mark workers upon
+ // restart.
+ if gcBlackenEnabled != 0 {
+ for idlePs != nil {
+ pp := idlePs
+
+ ok, _ := gcController.assignWaitingGCWorker(pp, now)
+ if !ok {
+ // No more mark workers needed.
+ break
+ }
+
+ // Got a worker, P is now runnable.
+ //
+ // mget may return nil if there aren't enough Ms, in
+ // which case startTheWorldWithSema will start one.
+ //
+ // N.B. findRunnableGCWorker will make the worker G
+ // itself runnable.
+ idlePs = pp.link.ptr()
+ mp := mget()
+ pp.m.set(mp)
+ pp.link.set(runnablePs)
+ runnablePs = pp
+ }
+ }
+
+ // Finally, any remaining Ps are truly idle.
+ for idlePs != nil {
+ pp := idlePs
+ idlePs = pp.link.ptr()
+ pidleput(pp, now)
+ }
+
stealOrder.reset(uint32(nprocs))
var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
@@ -6183,6 +6256,10 @@ func releasepNoTrace() *p {
print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n")
throw("releasep: invalid p state")
}
+
+ // P must clear if nextGCMarkWorker if it stops.
+ gcController.releaseNextGCMarkWorker(pp)
+
gp.m.p = 0
pp.m = 0
pp.status = _Pidle