aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/proc.go
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2025-11-20 14:40:43 -0500
committerCherry Mui <cherryyz@google.com>2025-11-20 14:40:43 -0500
commite3d4645693bc030b9ff9b867f1d374a1d72ef2fe (patch)
tree5d9c6783b4b1901e072ed253acc6ecdd909b23bc /src/runtime/proc.go
parent95b4ad525fc8d70c881960ab9f75f31548023bed (diff)
parentca37d24e0b9369b8086959df5bc230b38bf98636 (diff)
downloadgo-e3d4645693bc030b9ff9b867f1d374a1d72ef2fe.tar.xz
[dev.simd] all: merge master (ca37d24) into dev.simd
Conflicts: - src/cmd/compile/internal/typecheck/builtin.go Merge List: + 2025-11-20 ca37d24e0b net/http: drop unused "broken" field from persistConn + 2025-11-20 4b740af56a cmd/internal/obj/x86: handle global reference in From3 in dynlink mode + 2025-11-20 790384c6c2 spec: adjust rule for type parameter on RHS of alias declaration + 2025-11-20 a49b0302d0 net/http: correctly close fake net.Conns + 2025-11-20 32f5aadd2f cmd/compile: stack allocate backing stores during append + 2025-11-20 a18aff8057 runtime: select GC mark workers during start-the-world + 2025-11-20 829779f4fe runtime: split findRunnableGCWorker in two + 2025-11-20 ab59569099 go/version: use "custom" as an example of a version suffix + 2025-11-19 c4bb9653ba cmd/compile: Implement LoweredZeroLoop with LSX Instruction on loong64 + 2025-11-19 7f2ae21fb4 cmd/internal/obj/loong64: add MULW.D.W[U] instructions + 2025-11-19 a2946f2385 crypto: add Encapsulator and Decapsulator interfaces + 2025-11-19 6b83bd7146 crypto/ecdh: add KeyExchanger interface + 2025-11-19 4fef9f8b55 go/types, types2: fix object path for grouped declaration statements + 2025-11-19 33529db142 spec: escape double-ampersands + 2025-11-19 dc42565a20 cmd/compile: fix control flow for unsigned divisions proof relations + 2025-11-19 e64023dcbf cmd/compile: cleanup useless if statement in prove + 2025-11-19 2239520d1c test: go fmt prove.go tests + 2025-11-19 489d3dafb7 math: switch s390x math.Pow to generic implementation + 2025-11-18 8c41a482f9 runtime: add dlog.hexdump + 2025-11-18 e912618bd2 runtime: add hexdumper + 2025-11-18 2cf9d4b62f Revert "net/http: do not discard body content when closing it within request handlers" + 2025-11-18 4d0658bb08 cmd/compile: prefer fixed registers for values + 2025-11-18 ba634ca5c7 cmd/compile: fold boolean NOT into branches + 2025-11-18 8806d53c10 cmd/link: align sections, not symbols after DWARF compress + 2025-11-18 c93766007d runtime: do not print recovered when double panic with the same value + 2025-11-18 9859b43643 cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64 + 2025-11-17 b9ef0633f6 cmd/internal/sys,internal/goarch,runtime: enable the use of compressed instructions on riscv64 + 2025-11-17 a087dea869 debug/elf: sync new loong64 relocation types up to LoongArch ELF psABI v20250521 + 2025-11-17 e1a12c781f cmd/compile: use 32x32->64 multiplies on arm64 + 2025-11-17 6caab99026 runtime: relax TestMemoryLimit on darwin a bit more + 2025-11-17 eda2e8c683 runtime: clear frame pointer at thread entry points + 2025-11-17 6919858338 runtime: rename findrunnable references to findRunnable + 2025-11-17 8e734ec954 go/ast: fix BasicLit.End position for raw strings containing \r + 2025-11-17 592775ec7d crypto/mlkem: avoid a few unnecessary inverse NTT calls + 2025-11-17 590cf18daf crypto/mlkem/mlkemtest: add derandomized Encapsulate768/1024 + 2025-11-17 c12c337099 cmd/compile: teach prove about subtract idioms + 2025-11-17 bc15963813 cmd/compile: clean up prove pass + 2025-11-17 1297fae708 go/token: add (*File).End method + 2025-11-17 65c09eafdf runtime: hoist invariant code out of heapBitsSmallForAddrInline + 2025-11-17 594129b80c internal/runtime/maps: update doc for table.Clear + 2025-11-15 c58d075e9a crypto/rsa: deprecate PKCS#1 v1.5 encryption + 2025-11-14 d55ecea9e5 runtime: usleep before stealing runnext only if not in syscall + 2025-11-14 410ef44f00 cmd: update x/tools to 59ff18c + 2025-11-14 50128a2154 runtime: support runtime.freegc in size-specialized mallocs for noscan objects + 2025-11-14 c3708350a4 cmd/go: tests: rename git-min-vers->git-sha256 + 2025-11-14 aea881230d std: fix printf("%q", int) mistakes + 2025-11-14 120f1874ef runtime: add more precise test of assist credit handling for runtime.freegc + 2025-11-14 fecfcaa4f6 runtime: add runtime.freegc to reduce GC work + 2025-11-14 5a347b775e runtime: set GOEXPERIMENT=runtimefreegc to disabled by default + 2025-11-14 1a03d0db3f runtime: skip tests for GOEXPERIMENT=arenas that do not handle clobberfree=1 + 2025-11-14 cb0d9980f5 net/http: do not discard body content when closing it within request handlers + 2025-11-14 03ed43988f cmd/compile: allow multi-field structs to be stored directly in interfaces + 2025-11-14 1bb1f2bf0c runtime: put AddCleanup cleanup arguments in their own allocation + 2025-11-14 9fd2e44439 runtime: add AddCleanup benchmark + 2025-11-14 80c91eedbb runtime: ensure weak handles end up in their own allocation + 2025-11-14 7a8d0b5d53 runtime: add debug mode to extend _Grunning-without-P windows + 2025-11-14 710abf74da internal/runtime/cgobench: add Go function call benchmark for comparison + 2025-11-14 b24aec598b doc, cmd/internal/obj/riscv: document the riscv64 assembler + 2025-11-14 a0e738c657 cmd/compile/internal: remove incorrect riscv64 SLTI rule + 2025-11-14 2cdcc4150b cmd/compile: fold negation into multiplication + 2025-11-14 b57962b7c7 bytes: fix panic in bytes.Buffer.Peek + 2025-11-14 0a569528ea cmd/compile: optimize comparisons with single bit difference + 2025-11-14 1e5e6663e9 cmd/compile: remove unnecessary casts and types from riscv64 rules + 2025-11-14 ddd8558e61 go/types, types2: swap object.color for Checker.objPathIdx + 2025-11-14 9daaab305c cmd/link/internal/ld: make runtime.buildVersion with experiments valid + 2025-11-13 d50a571ddf test: fix tests to work with sizespecializedmalloc turned off + 2025-11-13 704f841eab cmd/trace: annotation proc start/stop with thread and proc always + 2025-11-13 17a02b9106 net/http: remove unused isLitOrSingle and isNotToken + 2025-11-13 ff61991aed cmd/go: fix flaky TestScript/mod_get_direct + 2025-11-13 129d0cb543 net/http/cgi: accept INCLUDED as protocol for server side includes + 2025-11-13 77c5130100 go/types: minor simplification + 2025-11-13 7601cd3880 go/types: generate cycles.go + 2025-11-13 7a372affd9 go/types, types2: rename definedType to declaredType and clarify docs Change-Id: Ibaa9bdb982364892f80e511c1bb12661fcd5fb86
Diffstat (limited to 'src/runtime/proc.go')
-rw-r--r--src/runtime/proc.go155
1 files changed, 128 insertions, 27 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 21b276cabf..58fb4bd681 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -3120,7 +3120,7 @@ func startm(pp *p, spinning, lockheld bool) {
//go:nowritebarrierrec
func handoffp(pp *p) {
// handoffp must start an M in any situation where
- // findrunnable would return a G to run on pp.
+ // findRunnable would return a G to run on pp.
// if it has local work, start it straight away
if !runqempty(pp) || !sched.runq.empty() {
@@ -3363,7 +3363,7 @@ func findRunnable() (gp *g, inheritTime, tryWakeP bool) {
mp := getg().m
// The conditions here and in handoffp must agree: if
- // findrunnable would return a G to run, handoffp must start
+ // findRunnable would return a G to run, handoffp must start
// an M.
top:
@@ -3587,7 +3587,7 @@ top:
goto top
}
if releasep() != pp {
- throw("findrunnable: wrong p")
+ throw("findRunnable: wrong p")
}
now = pidleput(pp, now)
unlock(&sched.lock)
@@ -3632,7 +3632,7 @@ top:
if mp.spinning {
mp.spinning = false
if sched.nmspinning.Add(-1) < 0 {
- throw("findrunnable: negative nmspinning")
+ throw("findRunnable: negative nmspinning")
}
// Note the for correctness, only the last M transitioning from
@@ -3705,10 +3705,10 @@ top:
if netpollinited() && (netpollAnyWaiters() || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 {
sched.pollUntil.Store(pollUntil)
if mp.p != 0 {
- throw("findrunnable: netpoll with p")
+ throw("findRunnable: netpoll with p")
}
if mp.spinning {
- throw("findrunnable: netpoll with spinning")
+ throw("findRunnable: netpoll with spinning")
}
delay := int64(-1)
if pollUntil != 0 {
@@ -3974,7 +3974,7 @@ func checkIdleGCNoP() (*p, *g) {
// timers and the network poller if there isn't one already.
func wakeNetPoller(when int64) {
if sched.lastpoll.Load() == 0 {
- // In findrunnable we ensure that when polling the pollUntil
+ // In findRunnable we ensure that when polling the pollUntil
// field is either zero or the time to which the current
// poll is expected to run. This can have a spurious wakeup
// but should never miss a wakeup.
@@ -3999,7 +3999,7 @@ func resetspinning() {
gp.m.spinning = false
nmspinning := sched.nmspinning.Add(-1)
if nmspinning < 0 {
- throw("findrunnable: negative nmspinning")
+ throw("findRunnable: negative nmspinning")
}
// M wakeup policy is deliberately somewhat conservative, so check if we
// need to wakeup another P here. See "Worker thread parking/unparking"
@@ -4136,11 +4136,23 @@ top:
gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available
+ // May be on a new P.
+ pp = mp.p.ptr()
+
// findRunnable may have collected an allp snapshot. The snapshot is
// only required within findRunnable. Clear it to all GC to collect the
// slice.
mp.clearAllpSnapshot()
+ // If the P was assigned a next GC mark worker but findRunnable
+ // selected anything else, release the worker so another P may run it.
+ //
+ // N.B. If this occurs because a higher-priority goroutine was selected
+ // (trace reader), then tryWakeP is set, which will wake another P to
+ // run the worker. If this occurs because the GC is no longer active,
+ // there is no need to wakep.
+ gcController.releaseNextGCMarkWorker(pp)
+
if debug.dontfreezetheworld > 0 && freezing.Load() {
// See comment in freezetheworld. We don't want to perturb
// scheduler state, so we didn't gcstopm in findRunnable, but
@@ -4659,6 +4671,11 @@ func reentersyscall(pc, sp, bp uintptr) {
gp.m.locks--
}
+// debugExtendGrunningNoP is a debug mode that extends the windows in which
+// we're _Grunning without a P in order to try to shake out bugs with code
+// assuming this state is impossible.
+const debugExtendGrunningNoP = false
+
// Standard syscall entry used by the go syscall library and normal cgo calls.
//
// This is exported via linkname to assembly in the syscall package and x/sys.
@@ -4771,6 +4788,9 @@ func entersyscallblock() {
// <--
// Caution: we're in a small window where we are in _Grunning without a P.
// -->
+ if debugExtendGrunningNoP {
+ usleep(10)
+ }
casgstatus(gp, _Grunning, _Gsyscall)
if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp {
systemstack(func() {
@@ -4853,6 +4873,9 @@ func exitsyscall() {
// Caution: we're in a window where we may be in _Grunning without a P.
// Either we will grab a P or call exitsyscall0, where we'll switch to
// _Grunnable.
+ if debugExtendGrunningNoP {
+ usleep(10)
+ }
// Grab and clear our old P.
oldp := gp.m.oldp.ptr()
@@ -6026,8 +6049,10 @@ func procresize(nprocs int32) *p {
unlock(&allpLock)
}
+ // Assign Ms to Ps with runnable goroutines.
var runnablePs *p
var runnablePsNeedM *p
+ var idlePs *p
for i := nprocs - 1; i >= 0; i-- {
pp := allp[i]
if gp.m.p.ptr() == pp {
@@ -6035,7 +6060,8 @@ func procresize(nprocs int32) *p {
}
pp.status = _Pidle
if runqempty(pp) {
- pidleput(pp, now)
+ pp.link.set(idlePs)
+ idlePs = pp
continue
}
@@ -6061,6 +6087,8 @@ func procresize(nprocs int32) *p {
pp.link.set(runnablePs)
runnablePs = pp
}
+ // Assign Ms to remaining runnable Ps without usable oldm. See comment
+ // above.
for runnablePsNeedM != nil {
pp := runnablePsNeedM
runnablePsNeedM = pp.link.ptr()
@@ -6071,6 +6099,62 @@ func procresize(nprocs int32) *p {
runnablePs = pp
}
+ // Now that we've assigned Ms to Ps with runnable goroutines, assign GC
+ // mark workers to remaining idle Ps, if needed.
+ //
+ // By assigning GC workers to Ps here, we slightly speed up starting
+ // the world, as we will start enough Ps to run all of the user
+ // goroutines and GC mark workers all at once, rather than using a
+ // sequence of wakep calls as each P's findRunnable realizes it needs
+ // to run a mark worker instead of a user goroutine.
+ //
+ // By assigning GC workers to Ps only _after_ previously-running Ps are
+ // assigned Ms, we ensure that goroutines previously running on a P
+ // continue to run on the same P, with GC mark workers preferring
+ // previously-idle Ps. This helps prevent goroutines from shuffling
+ // around too much across STW.
+ //
+ // N.B., if there aren't enough Ps left in idlePs for all of the GC
+ // mark workers, then findRunnable will still choose to run mark
+ // workers on Ps assigned above.
+ //
+ // N.B., we do this during any STW in the mark phase, not just the
+ // sweep termination STW that starts the mark phase. gcBgMarkWorker
+ // always preempts by removing itself from the P, so even unrelated
+ // STWs during the mark require that Ps reselect mark workers upon
+ // restart.
+ if gcBlackenEnabled != 0 {
+ for idlePs != nil {
+ pp := idlePs
+
+ ok, _ := gcController.assignWaitingGCWorker(pp, now)
+ if !ok {
+ // No more mark workers needed.
+ break
+ }
+
+ // Got a worker, P is now runnable.
+ //
+ // mget may return nil if there aren't enough Ms, in
+ // which case startTheWorldWithSema will start one.
+ //
+ // N.B. findRunnableGCWorker will make the worker G
+ // itself runnable.
+ idlePs = pp.link.ptr()
+ mp := mget()
+ pp.m.set(mp)
+ pp.link.set(runnablePs)
+ runnablePs = pp
+ }
+ }
+
+ // Finally, any remaining Ps are truly idle.
+ for idlePs != nil {
+ pp := idlePs
+ idlePs = pp.link.ptr()
+ pidleput(pp, now)
+ }
+
stealOrder.reset(uint32(nprocs))
var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
@@ -6173,6 +6257,10 @@ func releasepNoTrace() *p {
print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n")
throw("releasep: invalid p state")
}
+
+ // P must clear if nextGCMarkWorker if it stops.
+ gcController.releaseNextGCMarkWorker(pp)
+
gp.m.p = 0
pp.m = 0
pp.status = _Pidle
@@ -7259,7 +7347,7 @@ func pidlegetSpinning(now int64) (*p, int64) {
pp, now := pidleget(now)
if pp == nil {
- // See "Delicate dance" comment in findrunnable. We found work
+ // See "Delicate dance" comment in findRunnable. We found work
// that we cannot take, we must synchronize with non-spinning
// Ms that may be preparing to drop their P.
sched.needspinning.Store(1)
@@ -7497,23 +7585,36 @@ func runqgrab(pp *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool)
// Try to steal from pp.runnext.
if next := pp.runnext; next != 0 {
if pp.status == _Prunning {
- // Sleep to ensure that pp isn't about to run the g
- // we are about to steal.
- // The important use case here is when the g running
- // on pp ready()s another g and then almost
- // immediately blocks. Instead of stealing runnext
- // in this window, back off to give pp a chance to
- // schedule runnext. This will avoid thrashing gs
- // between different Ps.
- // A sync chan send/recv takes ~50ns as of time of
- // writing, so 3us gives ~50x overshoot.
- if !osHasLowResTimer {
- usleep(3)
- } else {
- // On some platforms system timer granularity is
- // 1-15ms, which is way too much for this
- // optimization. So just yield.
- osyield()
+ if mp := pp.m.ptr(); mp != nil {
+ if gp := mp.curg; gp == nil || readgstatus(gp)&^_Gscan != _Gsyscall {
+ // Sleep to ensure that pp isn't about to run the g
+ // we are about to steal.
+ // The important use case here is when the g running
+ // on pp ready()s another g and then almost
+ // immediately blocks. Instead of stealing runnext
+ // in this window, back off to give pp a chance to
+ // schedule runnext. This will avoid thrashing gs
+ // between different Ps.
+ // A sync chan send/recv takes ~50ns as of time of
+ // writing, so 3us gives ~50x overshoot.
+ // If curg is nil, we assume that the P is likely
+ // to be in the scheduler. If curg isn't nil and isn't
+ // in a syscall, then it's either running, waiting, or
+ // runnable. In this case we want to sleep because the
+ // P might either call into the scheduler soon (running),
+ // or already is (since we found a waiting or runnable
+ // goroutine hanging off of a running P, suggesting it
+ // either recently transitioned out of running, or will
+ // transition to running shortly).
+ if !osHasLowResTimer {
+ usleep(3)
+ } else {
+ // On some platforms system timer granularity is
+ // 1-15ms, which is way too much for this
+ // optimization. So just yield.
+ osyield()
+ }
+ }
}
}
if !pp.runnext.cas(next, 0) {