diff options
| author | Michael Pratt <mpratt@google.com> | 2025-10-24 15:14:59 -0400 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2025-11-13 07:44:41 -0800 |
| commit | 4ebf295b0b1740caac6302cc824ebd0f6175c1d5 (patch) | |
| tree | ccc6d602bcf37d7a40d144323ee340d14e830ebd /src/runtime/proc.go | |
| parent | 625d8e9b9cd7ede188a8856c5ac88791333baa63 (diff) | |
| download | go-4ebf295b0b1740caac6302cc824ebd0f6175c1d5.tar.xz | |
runtime: prefer to restart Ps on the same M after STW
Today, Ps jump around arbitrarily across STW. Instead, try to keep the P
on the previous M it ran on. In the future, we'll likely want to try to
expand this beyond STW to create a more general affinity for specific
Ms.
For this to be useful, the Ps need to have runnable Gs. Today, STW
preemption goes through goschedImpl, which places the G on the global
run queue. If that was the only G then the P won't have runnable
goroutines anymore.
It makes more sense to keep the G with its P across STW anyway, so add a
special case to goschedImpl for that.
On my machine, this CL reduces the error rate in TestTraceSTW from 99.8%
to 1.9%.
As a nearly 2% error rate shows, there are still cases where this best
effort scheduling doesn't work. The most obvious is that while
procresize assigns Ps back to their original M, startTheWorldWithSema
calls wakep to start a spinning M. The spinning M may steal a goroutine
from another P if that P is too slow to start.
For #65694.
Change-Id: I6a6a636c0969c587d039b68bc68ea16c74ff1fc9
Reviewed-on: https://go-review.googlesource.com/c/go/+/714801
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime/proc.go')
| -rw-r--r-- | src/runtime/proc.go | 83 |
1 files changed, 76 insertions, 7 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 30d2a68626..44b64913a5 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1009,6 +1009,8 @@ func mcommoninit(mp *m, id int64) { mp.id = mReserveID() } + mp.self = newMWeakPointer(mp) + mrandinit(mp) mpreinit(mp) @@ -2018,6 +2020,10 @@ func mexit(osStack bool) { // Free vgetrandom state. vgetrandomDestroy(mp) + // Clear the self pointer so Ps don't access this M after it is freed, + // or keep it alive. + mp.self.clear() + // Remove m from allm. lock(&sched.lock) for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink { @@ -4259,6 +4265,7 @@ func park_m(gp *g) { } func goschedImpl(gp *g, preempted bool) { + pp := gp.m.p.ptr() trace := traceAcquire() status := readgstatus(gp) if status&^_Gscan != _Grunning { @@ -4281,9 +4288,15 @@ func goschedImpl(gp *g, preempted bool) { } dropg() - lock(&sched.lock) - globrunqput(gp) - unlock(&sched.lock) + if preempted && sched.gcwaiting.Load() { + // If preempted for STW, keep the G on the local P in runnext + // so it can keep running immediately after the STW. + runqput(pp, gp, true) + } else { + lock(&sched.lock) + globrunqput(gp) + unlock(&sched.lock) + } if mainStarted { wakep() @@ -6013,6 +6026,7 @@ func procresize(nprocs int32) *p { } var runnablePs *p + var runnablePsNeedM *p for i := nprocs - 1; i >= 0; i-- { pp := allp[i] if gp.m.p.ptr() == pp { @@ -6021,12 +6035,41 @@ func procresize(nprocs int32) *p { pp.status = _Pidle if runqempty(pp) { pidleput(pp, now) - } else { - pp.m.set(mget()) - pp.link.set(runnablePs) - runnablePs = pp + continue + } + + // Prefer to run on the most recent M if it is + // available. + // + // Ps with no oldm (or for which oldm is already taken + // by an earlier P), we delay until all oldm Ps are + // handled. Otherwise, mget may return an M that a + // later P has in oldm. + var mp *m + if oldm := pp.oldm.get(); oldm != nil { + // Returns nil if oldm is not idle. + mp = mgetSpecific(oldm) + } + if mp == nil { + // Call mget later. + pp.link.set(runnablePsNeedM) + runnablePsNeedM = pp + continue } + pp.m.set(mp) + pp.link.set(runnablePs) + runnablePs = pp } + for runnablePsNeedM != nil { + pp := runnablePsNeedM + runnablePsNeedM = pp.link.ptr() + + mp := mget() + pp.m.set(mp) + pp.link.set(runnablePs) + runnablePs = pp + } + stealOrder.reset(uint32(nprocs)) var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs)) @@ -6064,6 +6107,11 @@ func acquirepNoTrace(pp *p) { // Have p; write barriers now allowed. + // The M we're associating with will be the old M after the next + // releasep. We must set this here because write barriers are not + // allowed in releasep. + pp.oldm = pp.m.ptr().self + // Perform deferred mcache flush before this P can allocate // from a potentially stale mcache. pp.mcache.prepareForSweep() @@ -6998,6 +7046,27 @@ func mget() *m { return mp } +// Try to get a specific m from midle list. Returns nil if it isn't on the +// midle list. +// +// sched.lock must be held. +// May run during STW, so write barriers are not allowed. +// +//go:nowritebarrierrec +func mgetSpecific(mp *m) *m { + assertLockHeld(&sched.lock) + + if mp.idleNode.prev == 0 && mp.idleNode.next == 0 { + // Not on the list. + return nil + } + + sched.midle.remove(unsafe.Pointer(mp)) + sched.nmidle-- + + return mp +} + // Put gp on the global runnable queue. // sched.lock must be held. // May run during STW, so write barriers are not allowed. |
