diff options
Diffstat (limited to 'src/runtime/proc.go')
| -rw-r--r-- | src/runtime/proc.go | 671 |
1 files changed, 412 insertions, 259 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 6c16effc95..081b9a2825 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1654,29 +1654,21 @@ func stopTheWorldWithSema(reason stwReason) worldStop { sched.stopwait = gomaxprocs sched.gcwaiting.Store(true) preemptall() - // stop current P + + // Stop current P. gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. gp.m.p.ptr().gcStopTime = start sched.stopwait-- - // try to retake all P's in Psyscall status - trace = traceAcquire() + + // Try to retake all P's in syscalls. for _, pp := range allp { - s := pp.status - if s == _Psyscall && atomic.Cas(&pp.status, s, _Pgcstop) { - if trace.ok() { - trace.ProcSteal(pp, false) - } - sched.nGsyscallNoP.Add(1) - pp.syscalltick++ - pp.gcStopTime = nanotime() - sched.stopwait-- + if thread, ok := setBlockOnExitSyscall(pp); ok { + thread.gcstopP() + thread.resume() } } - if trace.ok() { - traceRelease(trace) - } - // stop idle P's + // Stop idle Ps. now := nanotime() for { pp, _ := pidleget(now) @@ -1690,7 +1682,7 @@ func stopTheWorldWithSema(reason stwReason) worldStop { wait := sched.stopwait > 0 unlock(&sched.lock) - // wait for remaining P's to stop voluntarily + // Wait for remaining Ps to stop voluntarily. if wait { for { // wait for 100us, then try to re-preempt in case of any races @@ -2156,9 +2148,9 @@ func forEachPInternal(fn func(*p)) { } preemptall() - // Any P entering _Pidle or _Psyscall from now on will observe + // Any P entering _Pidle or a system call from now on will observe // p.runSafePointFn == 1 and will call runSafePointFn when - // changing its status to _Pidle/_Psyscall. + // changing its status to _Pidle. // Run safe point function for all idle Ps. sched.pidle will // not change because we hold sched.lock. @@ -2175,25 +2167,19 @@ func forEachPInternal(fn func(*p)) { // Run fn for the current P. fn(pp) - // Force Ps currently in _Psyscall into _Pidle and hand them + // Force Ps currently in a system call into _Pidle and hand them // off to induce safe point function execution. for _, p2 := range allp { - s := p2.status - // We need to be fine-grained about tracing here, since handoffp // might call into the tracer, and the tracer is non-reentrant. - trace := traceAcquire() - if s == _Psyscall && p2.runSafePointFn == 1 && atomic.Cas(&p2.status, s, _Pidle) { - if trace.ok() { - // It's important that we traceRelease before we call handoffp, which may also traceAcquire. - trace.ProcSteal(p2, false) - traceRelease(trace) - } - sched.nGsyscallNoP.Add(1) - p2.syscalltick++ + if atomic.Load(&p2.runSafePointFn) != 1 { + // Already ran it. + continue + } + if thread, ok := setBlockOnExitSyscall(p2); ok { + thread.takeP() + thread.resume() handoffp(p2) - } else if trace.ok() { - traceRelease(trace) } } @@ -2234,9 +2220,9 @@ func forEachPInternal(fn func(*p)) { // } // // runSafePointFn must be checked on any transition in to _Pidle or -// _Psyscall to avoid a race where forEachP sees that the P is running -// just before the P goes into _Pidle/_Psyscall and neither forEachP -// nor the P run the safe-point function. +// when entering a system call to avoid a race where forEachP sees +// that the P is running just before the P goes into _Pidle/system call +// and neither forEachP nor the P run the safe-point function. func runSafePointFn() { p := getg().m.p.ptr() // Resolve the race between forEachP running the safe-point @@ -2571,7 +2557,7 @@ func oneNewExtraM() { // So that the destructor would invoke dropm while the non-Go thread is exiting. // This is much faster since it avoids expensive signal-related syscalls. // -// This always runs without a P, so //go:nowritebarrierrec is required. +// This may run without a P, so //go:nowritebarrierrec is required. // // This may run with a different stack than was recorded in g0 (there is no // call to callbackUpdateSystemStack prior to dropm), so this must be @@ -4566,7 +4552,6 @@ func save(pc, sp, bp uintptr) { // //go:nosplit func reentersyscall(pc, sp, bp uintptr) { - trace := traceAcquire() gp := getg() // Disable preemption because during this function g is in Gsyscall status, @@ -4580,17 +4565,23 @@ func reentersyscall(pc, sp, bp uintptr) { gp.stackguard0 = stackPreempt gp.throwsplit = true + // Copy the syscalltick over so we can identify if the P got stolen later. + gp.m.syscalltick = gp.m.p.ptr().syscalltick + + pp := gp.m.p.ptr() + if pp.runSafePointFn != 0 { + // runSafePointFn may stack split if run on this stack + systemstack(runSafePointFn) + } + gp.m.oldp.set(pp) + // Leave SP around for GC and traceback. save(pc, sp, bp) gp.syscallsp = sp gp.syscallpc = pc gp.syscallbp = bp - casgstatus(gp, _Grunning, _Gsyscall) - if staticLockRanking { - // When doing static lock ranking casgstatus can call - // systemstack which clobbers g.sched. - save(pc, sp, bp) - } + + // Double-check sp and bp. if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { systemstack(func() { print("entersyscall inconsistent sp ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") @@ -4603,40 +4594,45 @@ func reentersyscall(pc, sp, bp uintptr) { throw("entersyscall") }) } - + trace := traceAcquire() if trace.ok() { + // Emit a trace event. Notably, actually emitting the event must happen before + // the casgstatus because it mutates the P, but the traceLocker must be held + // across the casgstatus so the transition is atomic with respect to the event. systemstack(func() { trace.GoSysCall() - traceRelease(trace) }) - // systemstack itself clobbers g.sched.{pc,sp} and we might - // need them later when the G is genuinely blocked in a - // syscall + // systemstack clobbered gp.sched, so restore it. save(pc, sp, bp) } - - if sched.sysmonwait.Load() { - systemstack(entersyscall_sysmon) + if sched.gcwaiting.Load() { + // Optimization: If there's a pending STW, do the equivalent of + // entersyscallblock here at the last minute and immediately give + // away our P. + systemstack(func() { + entersyscallHandleGCWait(trace) + }) + // systemstack clobbered gp.sched, so restore it. save(pc, sp, bp) } - - if gp.m.p.ptr().runSafePointFn != 0 { - // runSafePointFn may stack split if run on this stack - systemstack(runSafePointFn) + // As soon as we switch to _Gsyscall, we are in danger of losing our P. + // We must not touch it after this point. + casgstatus(gp, _Grunning, _Gsyscall) + if staticLockRanking { + // casgstatus clobbers gp.sched via systemstack under staticLockRanking. Restore it. save(pc, sp, bp) } - - gp.m.syscalltick = gp.m.p.ptr().syscalltick - pp := gp.m.p.ptr() - pp.m = 0 - gp.m.oldp.set(pp) - gp.m.p = 0 - atomic.Store(&pp.status, _Psyscall) - if sched.gcwaiting.Load() { - systemstack(entersyscall_gcwait) + if trace.ok() { + // N.B. We don't need to go on the systemstack because traceRelease is very + // carefully recursively nosplit. This also means we don't need to worry + // about clobbering gp.sched. + traceRelease(trace) + } + if sched.sysmonwait.Load() { + systemstack(entersyscallWakeSysmon) + // systemstack clobbered gp.sched, so restore it. save(pc, sp, bp) } - gp.m.locks-- } @@ -4663,7 +4659,7 @@ func entersyscall() { reentersyscall(sys.GetCallerPC(), sys.GetCallerSP(), fp) } -func entersyscall_sysmon() { +func entersyscallWakeSysmon() { lock(&sched.lock) if sched.sysmonwait.Load() { sched.sysmonwait.Store(false) @@ -4672,25 +4668,19 @@ func entersyscall_sysmon() { unlock(&sched.lock) } -func entersyscall_gcwait() { +func entersyscallHandleGCWait(trace traceLocker) { gp := getg() - pp := gp.m.oldp.ptr() lock(&sched.lock) - trace := traceAcquire() - if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) { + if sched.stopwait > 0 { + // Set our P to _Pgcstop so the STW can take it. + pp := gp.m.p.ptr() + pp.m = 0 + gp.m.p = 0 + atomic.Store(&pp.status, _Pgcstop) + if trace.ok() { - // This is a steal in the new tracer. While it's very likely - // that we were the ones to put this P into _Psyscall, between - // then and now it's totally possible it had been stolen and - // then put back into _Psyscall for us to acquire here. In such - // case ProcStop would be incorrect. - // - // TODO(mknyszek): Consider emitting a ProcStop instead when - // gp.m.syscalltick == pp.syscalltick, since then we know we never - // lost the P. - trace.ProcSteal(pp, true) - traceRelease(trace) + trace.ProcStop(pp) } sched.nGsyscallNoP.Add(1) pp.gcStopTime = nanotime() @@ -4698,8 +4688,6 @@ func entersyscall_gcwait() { if sched.stopwait--; sched.stopwait == 0 { notewakeup(&sched.stopnote) } - } else if trace.ok() { - traceRelease(trace) } unlock(&sched.lock) } @@ -4744,6 +4732,22 @@ func entersyscallblock() { throw("entersyscallblock") }) } + + // Once we switch to _Gsyscall, we can't safely touch + // our P anymore, so we need to hand it off beforehand. + // The tracer also needs to see the syscall before the P + // handoff, so the order here must be (1) trace, + // (2) handoff, (3) _Gsyscall switch. + trace := traceAcquire() + systemstack(func() { + if trace.ok() { + trace.GoSysCall() + } + handoffp(releasep()) + }) + // <-- + // Caution: we're in a small window where we are in _Grunning without a P. + // --> casgstatus(gp, _Grunning, _Gsyscall) if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { systemstack(func() { @@ -4757,8 +4761,11 @@ func entersyscallblock() { throw("entersyscallblock") }) } - - systemstack(entersyscallblock_handoff) + if trace.ok() { + systemstack(func() { + traceRelease(trace) + }) + } // Resave for traceback during blocked call. save(sys.GetCallerPC(), sys.GetCallerSP(), getcallerfp()) @@ -4766,15 +4773,6 @@ func entersyscallblock() { gp.m.locks-- } -func entersyscallblock_handoff() { - trace := traceAcquire() - if trace.ok() { - trace.GoSysCall() - traceRelease(trace) - } - handoffp(releasep()) -} - // The goroutine g exited its system call. // Arrange for it to run on a cpu again. // This is called only from the go syscall library, not @@ -4802,13 +4800,87 @@ func exitsyscall() { if sys.GetCallerSP() > gp.syscallsp { throw("exitsyscall: syscall frame is no longer valid") } - gp.waitsince = 0 + + if sched.stopwait == freezeStopWait { + // Wedge ourselves if there's an outstanding freezetheworld. + // If we transition to running, we might end up with our traceback + // being taken twice. + systemstack(func() { + lock(&deadlock) + lock(&deadlock) + }) + } + + // Optimistically assume we're going to keep running, and switch to running. + // Before this point, our P wiring is not ours. Once we get past this point, + // we can access our P if we have it, otherwise we lost it. + // + // N.B. Because we're transitioning to _Grunning here, traceAcquire doesn't + // need to be held ahead of time. We're effectively atomic with respect to + // the tracer because we're non-preemptible and in the runtime. It can't stop + // us to read a bad status. + casgstatus(gp, _Gsyscall, _Grunning) + + // Caution: we're in a window where we may be in _Grunning without a P. + // Either we will grab a P or call exitsyscall0, where we'll switch to + // _Grunnable. + + // Grab and clear our old P. oldp := gp.m.oldp.ptr() - gp.m.oldp = 0 - if exitsyscallfast(oldp) { - // When exitsyscallfast returns success, we have a P so can now use - // write barriers + gp.m.oldp.set(nil) + + // Check if we still have a P, and if not, try to acquire an idle P. + pp := gp.m.p.ptr() + if pp != nil { + // Fast path: we still have our P. Just emit a syscall exit event. + if trace := traceAcquire(); trace.ok() { + systemstack(func() { + // The truth is we truly never lost the P, but syscalltick + // is used to indicate whether the P should be treated as + // lost anyway. For example, when syscalltick is trashed by + // dropm. + // + // TODO(mknyszek): Consider a more explicit mechanism for this. + // Then syscalltick doesn't need to be trashed, and can be used + // exclusively by sysmon for deciding when it's time to retake. + if pp.syscalltick == gp.m.syscalltick { + trace.GoSysExit(false) + } else { + // Since we need to pretend we lost the P, but nobody ever + // took it, we need a ProcSteal event to model the loss. + // Then, continue with everything else we'd do if we lost + // the P. + trace.ProcSteal(pp) + trace.ProcStart() + trace.GoSysExit(true) + trace.GoStart() + } + traceRelease(trace) + }) + } + } else { + // Slow path: we lost our P. Try to get another one. + systemstack(func() { + // Try to get some other P. + if pp := exitsyscallTryGetP(oldp); pp != nil { + // Install the P. + acquirepNoTrace(pp) + + // We're going to start running again, so emit all the relevant events. + if trace := traceAcquire(); trace.ok() { + trace.ProcStart() + trace.GoSysExit(true) + trace.GoStart() + traceRelease(trace) + } + } + }) + pp = gp.m.p.ptr() + } + + // If we have a P, clean up and exit. + if pp != nil { if goroutineProfile.active { // Make sure that gp has had its stack written out to the goroutine // profile, exactly as it was when the goroutine profiler first @@ -4817,41 +4889,19 @@ func exitsyscall() { tryRecordGoroutineProfileWB(gp) }) } - trace := traceAcquire() - if trace.ok() { - lostP := oldp != gp.m.p.ptr() || gp.m.syscalltick != gp.m.p.ptr().syscalltick - systemstack(func() { - // Write out syscall exit eagerly. - // - // It's important that we write this *after* we know whether we - // lost our P or not (determined by exitsyscallfast). - trace.GoSysExit(lostP) - if lostP { - // We lost the P at some point, even though we got it back here. - // Trace that we're starting again, because there was a tracev2.GoSysBlock - // call somewhere in exitsyscallfast (indicating that this goroutine - // had blocked) and we're about to start running again. - trace.GoStart() - } - }) - } - // There's a cpu for us, so we can run. - gp.m.p.ptr().syscalltick++ - // We need to cas the status and scan before resuming... - casgstatus(gp, _Gsyscall, _Grunning) - if trace.ok() { - traceRelease(trace) - } + + // Increment the syscalltick for P, since we're exiting a syscall. + pp.syscalltick++ // Garbage collector isn't running (since we are), // so okay to clear syscallsp. gp.syscallsp = 0 gp.m.locks-- if gp.preempt { - // restore the preemption request in case we've cleared it in newstack + // Restore the preemption request in case we cleared it in newstack. gp.stackguard0 = stackPreempt } else { - // otherwise restore the real stackGuard, we've spoiled it in entersyscall/entersyscallblock + // Otherwise restore the real stackGuard, we clobbered it in entersyscall/entersyscallblock. gp.stackguard0 = gp.stack.lo + stackGuard } gp.throwsplit = false @@ -4860,14 +4910,13 @@ func exitsyscall() { // Scheduling of this goroutine is disabled. Gosched() } - return } - + // Slowest path: We couldn't get a P, so call into the scheduler. gp.m.locks-- // Call the scheduler. - mcall(exitsyscall0) + mcall(exitsyscallNoP) // Scheduler returned, so we're allowed to run now. // Delete the syscallsp information that we left for @@ -4880,78 +4929,38 @@ func exitsyscall() { gp.throwsplit = false } -//go:nosplit -func exitsyscallfast(oldp *p) bool { - // Freezetheworld sets stopwait but does not retake P's. - if sched.stopwait == freezeStopWait { - return false - } - - // Try to re-acquire the last P. - trace := traceAcquire() - if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) { - // There's a cpu for us, so we can run. - wirep(oldp) - exitsyscallfast_reacquired(trace) - if trace.ok() { - traceRelease(trace) +// exitsyscall's attempt to try to get any P, if it's missing one. +// Returns true on success. +// +// Must execute on the systemstack because exitsyscall is nosplit. +// +//go:systemstack +func exitsyscallTryGetP(oldp *p) *p { + // Try to steal our old P back. + if oldp != nil { + if thread, ok := setBlockOnExitSyscall(oldp); ok { + thread.takeP() + thread.resume() + sched.nGsyscallNoP.Add(-1) // takeP adds 1. + return oldp } - return true - } - if trace.ok() { - traceRelease(trace) } - // Try to get any other idle P. + // Try to get an idle P. if sched.pidle != 0 { - var ok bool - systemstack(func() { - ok = exitsyscallfast_pidle() - }) - if ok { - return true + lock(&sched.lock) + pp, _ := pidleget(0) + if pp != nil && sched.sysmonwait.Load() { + sched.sysmonwait.Store(false) + notewakeup(&sched.sysmonnote) } - } - return false -} - -// exitsyscallfast_reacquired is the exitsyscall path on which this G -// has successfully reacquired the P it was running on before the -// syscall. -// -//go:nosplit -func exitsyscallfast_reacquired(trace traceLocker) { - gp := getg() - if gp.m.syscalltick != gp.m.p.ptr().syscalltick { - if trace.ok() { - // The p was retaken and then enter into syscall again (since gp.m.syscalltick has changed). - // tracev2.GoSysBlock for this syscall was already emitted, - // but here we effectively retake the p from the new syscall running on the same p. - systemstack(func() { - // We're stealing the P. It's treated - // as if it temporarily stopped running. Then, start running. - trace.ProcSteal(gp.m.p.ptr(), true) - trace.ProcStart() - }) + unlock(&sched.lock) + if pp != nil { + sched.nGsyscallNoP.Add(-1) + return pp } - gp.m.p.ptr().syscalltick++ - } -} - -func exitsyscallfast_pidle() bool { - lock(&sched.lock) - pp, _ := pidleget(0) - if pp != nil && sched.sysmonwait.Load() { - sched.sysmonwait.Store(false) - notewakeup(&sched.sysmonnote) } - unlock(&sched.lock) - if pp != nil { - sched.nGsyscallNoP.Add(-1) - acquirep(pp) - return true - } - return false + return nil } // exitsyscall slow path on g0. @@ -4960,11 +4969,10 @@ func exitsyscallfast_pidle() bool { // Called via mcall, so gp is the calling g from this M. // //go:nowritebarrierrec -func exitsyscall0(gp *g) { - var trace traceLocker +func exitsyscallNoP(gp *g) { traceExitingSyscall() - trace = traceAcquire() - casgstatus(gp, _Gsyscall, _Grunnable) + trace := traceAcquire() + casgstatus(gp, _Grunning, _Grunnable) traceExitedSyscall() if trace.ok() { // Write out syscall exit eagerly. @@ -6021,6 +6029,21 @@ func procresize(nprocs int32) *p { // //go:yeswritebarrierrec func acquirep(pp *p) { + // Do the work. + acquirepNoTrace(pp) + + // Emit the event. + trace := traceAcquire() + if trace.ok() { + trace.ProcStart() + traceRelease(trace) + } +} + +// Internals of acquirep, just skipping the trace events. +// +//go:yeswritebarrierrec +func acquirepNoTrace(pp *p) { // Do the part that isn't allowed to have write barriers. wirep(pp) @@ -6029,12 +6052,6 @@ func acquirep(pp *p) { // Perform deferred mcache flush before this P can allocate // from a potentially stale mcache. pp.mcache.prepareForSweep() - - trace := traceAcquire() - if trace.ok() { - trace.ProcStart() - traceRelease(trace) - } } // wirep is the first step of acquirep, which actually associates the @@ -6382,73 +6399,205 @@ func retake(now int64) uint32 { // temporarily drop the allpLock. Hence, we need to re-fetch // allp each time around the loop. for i := 0; i < len(allp); i++ { + // Quickly filter out non-running Ps. Running Ps are either + // in a syscall or are actually executing. Idle Ps don't + // need to be retaken. + // + // This is best-effort, so it's OK that it's racy. Our target + // is to retake Ps that have been running or in a syscall for + // a long time (milliseconds), so the state has plenty of time + // to stabilize. pp := allp[i] - if pp == nil { - // This can happen if procresize has grown + if pp == nil || atomic.Load(&pp.status) != _Prunning { + // pp can be nil if procresize has grown // allp but not yet created new Ps. continue } pd := &pp.sysmontick - s := pp.status sysretake := false - if s == _Prunning || s == _Psyscall { - // Preempt G if it's running on the same schedtick for - // too long. This could be from a single long-running - // goroutine or a sequence of goroutines run via - // runnext, which share a single schedtick time slice. - t := int64(pp.schedtick) - if int64(pd.schedtick) != t { - pd.schedtick = uint32(t) - pd.schedwhen = now - } else if pd.schedwhen+forcePreemptNS <= now { - preemptone(pp) - // In case of syscall, preemptone() doesn't - // work, because there is no M wired to P. - sysretake = true - } + + // Preempt G if it's running on the same schedtick for + // too long. This could be from a single long-running + // goroutine or a sequence of goroutines run via + // runnext, which share a single schedtick time slice. + schedt := int64(pp.schedtick) + if int64(pd.schedtick) != schedt { + pd.schedtick = uint32(schedt) + pd.schedwhen = now + } else if pd.schedwhen+forcePreemptNS <= now { + preemptone(pp) + // If pp is in a syscall, preemptone doesn't work. + // The goroutine nor the thread can respond to a + // preemption request because they're not in Go code, + // so we need to take the P ourselves. + sysretake = true } - if s == _Psyscall { - // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). - t := int64(pp.syscalltick) - if !sysretake && int64(pd.syscalltick) != t { - pd.syscalltick = uint32(t) - pd.syscallwhen = now - continue - } - // On the one hand we don't want to retake Ps if there is no other work to do, - // but on the other hand we want to retake them eventually - // because they can prevent the sysmon thread from deep sleep. - if runqempty(pp) && sched.nmspinning.Load()+sched.npidle.Load() > 0 && pd.syscallwhen+10*1000*1000 > now { - continue - } - // Drop allpLock so we can take sched.lock. - unlock(&allpLock) - // Need to decrement number of idle locked M's - // (pretending that one more is running) before the CAS. - // Otherwise the M from which we retake can exit the syscall, - // increment nmidle and report deadlock. - incidlelocked(-1) - trace := traceAcquire() - if atomic.Cas(&pp.status, s, _Pidle) { - if trace.ok() { - trace.ProcSteal(pp, false) - traceRelease(trace) - } - sched.nGsyscallNoP.Add(1) - n++ - pp.syscalltick++ - handoffp(pp) - } else if trace.ok() { - traceRelease(trace) - } - incidlelocked(1) - lock(&allpLock) + + // Drop allpLock so we can take sched.lock. + unlock(&allpLock) + + // Need to decrement number of idle locked M's (pretending that + // one more is running) before we take the P and resume. + // Otherwise the M from which we retake can exit the syscall, + // increment nmidle and report deadlock. + // + // Can't call incidlelocked once we setBlockOnExitSyscall, due + // to a lock ordering violation between sched.lock and _Gscan. + incidlelocked(-1) + + // Try to prevent the P from continuing in the syscall, if it's in one at all. + thread, ok := setBlockOnExitSyscall(pp) + if !ok { + // Not in a syscall, or something changed out from under us. + goto done } + + // Retake the P if it's there for more than 1 sysmon tick (at least 20us). + if syst := int64(pp.syscalltick); !sysretake && int64(pd.syscalltick) != syst { + pd.syscalltick = uint32(syst) + pd.syscallwhen = now + thread.resume() + goto done + } + + // On the one hand we don't want to retake Ps if there is no other work to do, + // but on the other hand we want to retake them eventually + // because they can prevent the sysmon thread from deep sleep. + if runqempty(pp) && sched.nmspinning.Load()+sched.npidle.Load() > 0 && pd.syscallwhen+10*1000*1000 > now { + thread.resume() + goto done + } + + // Take the P. Note: because we have the scan bit, the goroutine + // is at worst stuck spinning in exitsyscall. + thread.takeP() + thread.resume() + n++ + + // Handoff the P for some other thread to run it. + handoffp(pp) + + // The P has been handed off to another thread, so risk of a false + // deadlock report while we hold onto it is gone. + done: + incidlelocked(1) + lock(&allpLock) } unlock(&allpLock) return uint32(n) } +// syscallingThread represents a thread in a system call that temporarily +// cannot advance out of the system call. +type syscallingThread struct { + gp *g + mp *m + pp *p + status uint32 +} + +// setBlockOnExitSyscall prevents pp's thread from advancing out of +// exitsyscall. On success, returns the g/m/p state of the thread +// and true. At that point, the caller owns the g/m/p links referenced, +// the goroutine is in _Gsyscall, and prevented from transitioning out +// of it. On failure, it returns false, and none of these guarantees are +// made. +// +// Callers must call resume on the resulting thread state once +// they're done with thread, otherwise it will remain blocked forever. +// +// This function races with state changes on pp, and thus may fail +// if pp is not in a system call, or exits a system call concurrently +// with this function. However, this function is safe to call without +// any additional synchronization. +func setBlockOnExitSyscall(pp *p) (syscallingThread, bool) { + if pp.status != _Prunning { + return syscallingThread{}, false + } + // Be very careful here, these reads are intentionally racy. + // Once we notice the G is in _Gsyscall, acquire its scan bit, + // and validate that it's still connected to the *same* M and P, + // we can actually get to work. Holding the scan bit will prevent + // the G from exiting the syscall. + // + // Our goal here is to interrupt long syscalls. If it turns out + // that we're wrong and the G switched to another syscall while + // we were trying to do this, that's completely fine. It's + // probably making more frequent syscalls and the typical + // preemption paths should be effective. + mp := pp.m.ptr() + if mp == nil { + // Nothing to do. + return syscallingThread{}, false + } + gp := mp.curg + if gp == nil { + // Nothing to do. + return syscallingThread{}, false + } + status := readgstatus(gp) &^ _Gscan + + // A goroutine is considered in a syscall, and may have a corresponding + // P, if it's in _Gsyscall *or* _Gdeadextra. In the latter case, it's an + // extra M goroutine. + if status != _Gsyscall && status != _Gdeadextra { + // Not in a syscall, nothing to do. + return syscallingThread{}, false + } + if !castogscanstatus(gp, status, status|_Gscan) { + // Not in _Gsyscall or _Gdeadextra anymore. Nothing to do. + return syscallingThread{}, false + } + if gp.m != mp || gp.m.p.ptr() != pp { + // This is not what we originally observed. Nothing to do. + casfrom_Gscanstatus(gp, status|_Gscan, status) + return syscallingThread{}, false + } + return syscallingThread{gp, mp, pp, status}, true +} + +// gcstopP unwires the P attached to the syscalling thread +// and moves it into the _Pgcstop state. +// +// The caller must be stopping the world. +func (s syscallingThread) gcstopP() { + assertLockHeld(&sched.lock) + + s.releaseP(_Pgcstop) + s.pp.gcStopTime = nanotime() + sched.stopwait-- +} + +// takeP unwires the P attached to the syscalling thread +// and moves it into the _Pidle state. +func (s syscallingThread) takeP() { + s.releaseP(_Pidle) +} + +// releaseP unwires the P from the syscalling thread, moving +// it to the provided state. Callers should prefer to use +// takeP and gcstopP. +func (s syscallingThread) releaseP(state uint32) { + if state != _Pidle && state != _Pgcstop { + throw("attempted to release P into a bad state") + } + trace := traceAcquire() + s.pp.m = 0 + s.mp.p = 0 + atomic.Store(&s.pp.status, state) + if trace.ok() { + trace.ProcSteal(s.pp) + traceRelease(trace) + } + sched.nGsyscallNoP.Add(1) + s.pp.syscalltick++ +} + +// resume allows a syscalling thread to advance beyond exitsyscall. +func (s syscallingThread) resume() { + casfrom_Gscanstatus(s.gp, s.status|_Gscan, s.status) +} + // Tell all goroutines that they have been preempted and they should stop. // This function is purely best-effort. It can fail to inform a goroutine if a // processor just started running it. @@ -6486,6 +6635,10 @@ func preemptone(pp *p) bool { if gp == nil || gp == mp.g0 { return false } + if readgstatus(gp)&^_Gscan == _Gsyscall { + // Don't bother trying to preempt a goroutine in a syscall. + return false + } gp.preempt = true |
