diff options
| author | Katie Hockman <katie@golang.org> | 2020-12-14 10:03:05 -0500 |
|---|---|---|
| committer | Katie Hockman <katie@golang.org> | 2020-12-14 10:06:13 -0500 |
| commit | 0345ede87ee12698988973884cfc0fd3d499dffd (patch) | |
| tree | 7123cff141ee5661208d2f5f437b8f5252ac7f6a /src/runtime/mgc.go | |
| parent | 4651d6b267818b0e0d128a5443289717c4bb8cbc (diff) | |
| parent | 0a02371b0576964e81c3b40d328db9a3ef3b031b (diff) | |
| download | go-0345ede87ee12698988973884cfc0fd3d499dffd.tar.xz | |
[dev.fuzz] all: merge master into dev.fuzz
Change-Id: I5d8c8329ccc9d747bd81ade6b1cb7cb8ae2e94b2
Diffstat (limited to 'src/runtime/mgc.go')
| -rw-r--r-- | src/runtime/mgc.go | 474 |
1 files changed, 251 insertions, 223 deletions
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index bd87144355..185d3201ca 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -290,10 +290,14 @@ func setGCPhase(x uint32) { type gcMarkWorkerMode int const ( + // gcMarkWorkerNotWorker indicates that the next scheduled G is not + // starting work and the mode should be ignored. + gcMarkWorkerNotWorker gcMarkWorkerMode = iota + // gcMarkWorkerDedicatedMode indicates that the P of a mark // worker is dedicated to running that mark worker. The mark // worker should run without preemption. - gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota + gcMarkWorkerDedicatedMode // gcMarkWorkerFractionalMode indicates that a P is currently // running the "fractional" mark worker. The fractional worker @@ -313,6 +317,7 @@ const ( // gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes // to use in execution traces. var gcMarkWorkerModeStrings = [...]string{ + "Not worker", "GC (dedicated)", "GC (fractional)", "GC (idle)", @@ -388,10 +393,24 @@ type gcControllerState struct { // bytes that should be performed by mutator assists. This is // computed at the beginning of each cycle and updated every // time heap_scan is updated. - assistWorkPerByte float64 + // + // Stored as a uint64, but it's actually a float64. Use + // float64frombits to get the value. + // + // Read and written atomically. + assistWorkPerByte uint64 // assistBytesPerWork is 1/assistWorkPerByte. - assistBytesPerWork float64 + // + // Stored as a uint64, but it's actually a float64. Use + // float64frombits to get the value. + // + // Read and written atomically. + // + // Note that because this is read and written independently + // from assistWorkPerByte users may notice a skew between + // the two values, and such a state should be safe. + assistBytesPerWork uint64 // fractionalUtilizationGoal is the fraction of wall clock // time that should be spent in the fractional mark worker on @@ -409,7 +428,8 @@ type gcControllerState struct { } // startCycle resets the GC controller's state and computes estimates -// for a new GC cycle. The caller must hold worldsema. +// for a new GC cycle. The caller must hold worldsema and the world +// must be stopped. func (c *gcControllerState) startCycle() { c.scanWork = 0 c.bgScanCredit = 0 @@ -469,7 +489,8 @@ func (c *gcControllerState) startCycle() { c.revise() if debug.gcpacertrace > 0 { - print("pacer: assist ratio=", c.assistWorkPerByte, + assistRatio := float64frombits(atomic.Load64(&c.assistWorkPerByte)) + print("pacer: assist ratio=", assistRatio, " (scan ", memstats.heap_scan>>20, " MB in ", work.initialHeapLive>>20, "->", memstats.next_gc>>20, " MB)", @@ -479,9 +500,22 @@ func (c *gcControllerState) startCycle() { } // revise updates the assist ratio during the GC cycle to account for -// improved estimates. This should be called either under STW or -// whenever memstats.heap_scan, memstats.heap_live, or -// memstats.next_gc is updated (with mheap_.lock held). +// improved estimates. This should be called whenever memstats.heap_scan, +// memstats.heap_live, or memstats.next_gc is updated. It is safe to +// call concurrently, but it may race with other calls to revise. +// +// The result of this race is that the two assist ratio values may not line +// up or may be stale. In practice this is OK because the assist ratio +// moves slowly throughout a GC cycle, and the assist ratio is a best-effort +// heuristic anyway. Furthermore, no part of the heuristic depends on +// the two assist ratio values being exact reciprocals of one another, since +// the two values are used to convert values from different sources. +// +// The worst case result of this raciness is that we may miss a larger shift +// in the ratio (say, if we decide to pace more aggressively against the +// hard heap goal) but even this "hard goal" is best-effort (see #40460). +// The dedicated GC should ensure we don't exceed the hard goal by too much +// in the rare case we do exceed it. // // It should only be called when gcBlackenEnabled != 0 (because this // is when assists are enabled and the necessary statistics are @@ -494,10 +528,12 @@ func (c *gcControllerState) revise() { gcpercent = 100000 } live := atomic.Load64(&memstats.heap_live) + scan := atomic.Load64(&memstats.heap_scan) + work := atomic.Loadint64(&c.scanWork) // Assume we're under the soft goal. Pace GC to complete at // next_gc assuming the heap is in steady-state. - heapGoal := int64(memstats.next_gc) + heapGoal := int64(atomic.Load64(&memstats.next_gc)) // Compute the expected scan work remaining. // @@ -508,17 +544,17 @@ func (c *gcControllerState) revise() { // // (This is a float calculation to avoid overflowing on // 100*heap_scan.) - scanWorkExpected := int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent)) + scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent)) - if live > memstats.next_gc || c.scanWork > scanWorkExpected { + if int64(live) > heapGoal || work > scanWorkExpected { // We're past the soft goal, or we've already done more scan // work than we expected. Pace GC so that in the worst case it // will complete by the hard goal. const maxOvershoot = 1.1 - heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) + heapGoal = int64(float64(heapGoal) * maxOvershoot) // Compute the upper bound on the scan work remaining. - scanWorkExpected = int64(memstats.heap_scan) + scanWorkExpected = int64(scan) } // Compute the remaining scan work estimate. @@ -528,7 +564,7 @@ func (c *gcControllerState) revise() { // (scanWork), so allocation will change this difference // slowly in the soft regime and not at all in the hard // regime. - scanWorkRemaining := scanWorkExpected - c.scanWork + scanWorkRemaining := scanWorkExpected - work if scanWorkRemaining < 1000 { // We set a somewhat arbitrary lower bound on // remaining scan work since if we aim a little high, @@ -552,8 +588,15 @@ func (c *gcControllerState) revise() { // Compute the mutator assist ratio so by the time the mutator // allocates the remaining heap bytes up to next_gc, it will // have done (or stolen) the remaining amount of scan work. - c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining) - c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining) + // Note that the assist ratio values are updated atomically + // but not together. This means there may be some degree of + // skew between the two values. This is generally OK as the + // values shift relatively slowly over the course of a GC + // cycle. + assistWorkPerByte := float64(scanWorkRemaining) / float64(heapRemaining) + assistBytesPerWork := float64(heapRemaining) / float64(scanWorkRemaining) + atomic.Store64(&c.assistWorkPerByte, float64bits(assistWorkPerByte)) + atomic.Store64(&c.assistBytesPerWork, float64bits(assistBytesPerWork)) } // endCycle computes the trigger ratio for the next cycle. @@ -670,18 +713,12 @@ func (c *gcControllerState) enlistWorker() { } } -// findRunnableGCWorker returns the background mark worker for _p_ if it +// findRunnableGCWorker returns a background mark worker for _p_ if it // should be run. This must only be called when gcBlackenEnabled != 0. func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { if gcBlackenEnabled == 0 { throw("gcControllerState.findRunnable: blackening not enabled") } - if _p_.gcBgMarkWorker == 0 { - // The mark worker associated with this P is blocked - // performing a mark transition. We can't run it - // because it may be on some other run or wait queue. - return nil - } if !gcMarkWorkAvailable(_p_) { // No work to be done right now. This can happen at @@ -691,15 +728,35 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { return nil } + // Grab a worker before we commit to running below. + node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) + if node == nil { + // There is at least one worker per P, so normally there are + // enough workers to run on all Ps, if necessary. However, once + // a worker enters gcMarkDone it may park without rejoining the + // pool, thus freeing a P with no corresponding worker. + // gcMarkDone never depends on another worker doing work, so it + // is safe to simply do nothing here. + // + // If gcMarkDone bails out without completing the mark phase, + // it will always do so with queued global work. Thus, that P + // will be immediately eligible to re-run the worker G it was + // just using, ensuring work can complete. + return nil + } + decIfPositive := func(ptr *int64) bool { - if *ptr > 0 { - if atomic.Xaddint64(ptr, -1) >= 0 { + for { + v := atomic.Loadint64(ptr) + if v <= 0 { + return false + } + + // TODO: having atomic.Casint64 would be more pleasant. + if atomic.Cas64((*uint64)(unsafe.Pointer(ptr)), uint64(v), uint64(v-1)) { return true } - // We lost a race - atomic.Xaddint64(ptr, +1) } - return false } if decIfPositive(&c.dedicatedMarkWorkersNeeded) { @@ -708,6 +765,7 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode } else if c.fractionalUtilizationGoal == 0 { // No need for fractional workers. + gcBgMarkWorkerPool.push(&node.node) return nil } else { // Is this P behind on the fractional utilization @@ -717,14 +775,15 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { delta := nanotime() - gcController.markStartTime if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { // Nope. No need to run a fractional worker. + gcBgMarkWorkerPool.push(&node.node) return nil } // Run a fractional worker. _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode } - // Run the background mark worker - gp := _p_.gcBgMarkWorker.ptr() + // Run the background mark worker. + gp := node.gp.ptr() casgstatus(gp, _Gwaiting, _Grunnable) if trace.enabled { traceGoUnpark(gp, 0) @@ -762,6 +821,8 @@ func pollFractionalWorkerExit() bool { // // mheap_.lock must be held or the world must be stopped. func gcSetTriggerRatio(triggerRatio float64) { + assertWorldStoppedOrLockHeld(&mheap_.lock) + // Compute the next GC goal, which is when the allocated heap // has grown by GOGC/100 over the heap marked by the last // cycle. @@ -844,7 +905,7 @@ func gcSetTriggerRatio(triggerRatio float64) { // Commit to the trigger and goal. memstats.gc_trigger = trigger - memstats.next_gc = goal + atomic.Store64(&memstats.next_gc, goal) if trace.enabled { traceNextGC() } @@ -901,7 +962,9 @@ func gcSetTriggerRatio(triggerRatio float64) { // // mheap_.lock must be held or the world must be stopped. func gcEffectiveGrowthRatio() float64 { - egogc := float64(memstats.next_gc-memstats.heap_marked) / float64(memstats.heap_marked) + assertWorldStoppedOrLockHeld(&mheap_.lock) + + egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked) if egogc < 0 { // Shouldn't happen, but just in case. egogc = 0 @@ -983,7 +1046,6 @@ var work struct { nproc uint32 tstart int64 nwait uint32 - ndone uint32 // Number of roots of various root types. Set by gcMarkRootPrepare. nFlushCacheRoots int @@ -1381,6 +1443,7 @@ func gcStart(trigger gcTrigger) { now = startTheWorldWithSema(trace.enabled) work.pauseNS += now - work.pauseStart work.tMark = now + memstats.gcPauseDist.record(now - work.pauseStart) }) // Release the world sema before Gosched() in STW mode @@ -1407,19 +1470,6 @@ func gcStart(trigger gcTrigger) { // This is protected by markDoneSema. var gcMarkDoneFlushed uint32 -// debugCachedWork enables extra checks for debugging premature mark -// termination. -// -// For debugging issue #27993. -const debugCachedWork = false - -// gcWorkPauseGen is for debugging the mark completion algorithm. -// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen. -// Only used if debugCachedWork is true. -// -// For debugging issue #27993. -var gcWorkPauseGen uint32 = 1 - // gcMarkDone transitions the GC from mark to mark termination if all // reachable objects have been marked (that is, there are no grey // objects and can be no more in the future). Otherwise, it flushes @@ -1475,15 +1525,7 @@ top: // Flush the write barrier buffer, since this may add // work to the gcWork. wbBufFlush1(_p_) - // For debugging, shrink the write barrier - // buffer so it flushes immediately. - // wbBuf.reset will keep it at this size as - // long as throwOnGCWork is set. - if debugCachedWork { - b := &_p_.wbBuf - b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) - b.debugGen = gcWorkPauseGen - } + // Flush the gcWork, since this may create global work // and set the flushedWork flag. // @@ -1494,29 +1536,12 @@ top: if _p_.gcw.flushedWork { atomic.Xadd(&gcMarkDoneFlushed, 1) _p_.gcw.flushedWork = false - } else if debugCachedWork { - // For debugging, freeze the gcWork - // until we know whether we've reached - // completion or not. If we think - // we've reached completion, but - // there's a paused gcWork, then - // that's a bug. - _p_.gcw.pauseGen = gcWorkPauseGen - // Capture the G's stack. - for i := range _p_.gcw.pauseStack { - _p_.gcw.pauseStack[i] = 0 - } - callers(1, _p_.gcw.pauseStack[:]) } }) casgstatus(gp, _Gwaiting, _Grunning) }) if gcMarkDoneFlushed != 0 { - if debugCachedWork { - // Release paused gcWorks. - atomic.Xadd(&gcWorkPauseGen, 1) - } // More grey objects were discovered since the // previous termination check, so there may be more // work to do. Keep going. It's possible the @@ -1526,13 +1551,6 @@ top: goto top } - if debugCachedWork { - throwOnGCWork = true - // Release paused gcWorks. If there are any, they - // should now observe throwOnGCWork and panic. - atomic.Xadd(&gcWorkPauseGen, 1) - } - // There was no global work, no local work, and no Ps // communicated work since we took markDoneSema. Therefore // there are no grey objects and no more objects can be @@ -1549,59 +1567,34 @@ top: // below. The important thing is that the wb remains active until // all marking is complete. This includes writes made by the GC. - if debugCachedWork { - // For debugging, double check that no work was added after we - // went around above and disable write barrier buffering. + // There is sometimes work left over when we enter mark termination due + // to write barriers performed after the completion barrier above. + // Detect this and resume concurrent mark. This is obviously + // unfortunate. + // + // See issue #27993 for details. + // + // Switch to the system stack to call wbBufFlush1, though in this case + // it doesn't matter because we're non-preemptible anyway. + restart := false + systemstack(func() { for _, p := range allp { - gcw := &p.gcw - if !gcw.empty() { - printlock() - print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) - if gcw.wbuf1 == nil { - print(" wbuf1=<nil>") - } else { - print(" wbuf1.n=", gcw.wbuf1.nobj) - } - if gcw.wbuf2 == nil { - print(" wbuf2=<nil>") - } else { - print(" wbuf2.n=", gcw.wbuf2.nobj) - } - print("\n") - if gcw.pauseGen == gcw.putGen { - println("runtime: checkPut already failed at this generation") - } - throw("throwOnGCWork") + wbBufFlush1(p) + if !p.gcw.empty() { + restart = true + break } } - } else { - // For unknown reasons (see issue #27993), there is - // sometimes work left over when we enter mark - // termination. Detect this and resume concurrent - // mark. This is obviously unfortunate. - // - // Switch to the system stack to call wbBufFlush1, - // though in this case it doesn't matter because we're - // non-preemptible anyway. - restart := false + }) + if restart { + getg().m.preemptoff = "" systemstack(func() { - for _, p := range allp { - wbBufFlush1(p) - if !p.gcw.empty() { - restart = true - break - } - } + now := startTheWorldWithSema(true) + work.pauseNS += now - work.pauseStart + memstats.gcPauseDist.record(now - work.pauseStart) }) - if restart { - getg().m.preemptoff = "" - systemstack(func() { - now := startTheWorldWithSema(true) - work.pauseNS += now - work.pauseStart - }) - semrelease(&worldsema) - goto top - } + semrelease(&worldsema) + goto top } // Disable assists and background workers. We must do @@ -1630,10 +1623,10 @@ top: gcMarkTermination(nextTriggerRatio) } +// World must be stopped and mark assists and background workers must be +// disabled. func gcMarkTermination(nextTriggerRatio float64) { - // World is stopped. - // Start marktermination which includes enabling the write barrier. - atomic.Store(&gcBlackenEnabled, 0) + // Start marktermination (write barrier remains enabled for now). setGCPhase(_GCmarktermination) work.heap1 = memstats.heap_live @@ -1711,6 +1704,7 @@ func gcMarkTermination(nextTriggerRatio float64) { unixNow := sec*1e9 + int64(nsec) work.pauseNS += now - work.pauseStart work.tEnd = now + memstats.gcPauseDist.record(now - work.pauseStart) atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) @@ -1827,19 +1821,25 @@ func gcMarkTermination(nextTriggerRatio float64) { } } -// gcBgMarkStartWorkers prepares background mark worker goroutines. -// These goroutines will not run until the mark phase, but they must -// be started while the work is not stopped and from a regular G -// stack. The caller must hold worldsema. +// gcBgMarkStartWorkers prepares background mark worker goroutines. These +// goroutines will not run until the mark phase, but they must be started while +// the work is not stopped and from a regular G stack. The caller must hold +// worldsema. func gcBgMarkStartWorkers() { - // Background marking is performed by per-P G's. Ensure that - // each P has a background GC G. - for _, p := range allp { - if p.gcBgMarkWorker == 0 { - go gcBgMarkWorker(p) - notetsleepg(&work.bgMarkReady, -1) - noteclear(&work.bgMarkReady) - } + // Background marking is performed by per-P G's. Ensure that each P has + // a background GC G. + // + // Worker Gs don't exit if gomaxprocs is reduced. If it is raised + // again, we can reuse the old workers; no need to create new workers. + for gcBgMarkWorkerCount < gomaxprocs { + go gcBgMarkWorker() + + notetsleepg(&work.bgMarkReady, -1) + noteclear(&work.bgMarkReady) + // The worker is now guaranteed to be added to the pool before + // its P's next findRunnableGCWorker. + + gcBgMarkWorkerCount++ } } @@ -1859,82 +1859,104 @@ func gcBgMarkPrepare() { work.nwait = ^uint32(0) } -func gcBgMarkWorker(_p_ *p) { +// gcBgMarkWorker is an entry in the gcBgMarkWorkerPool. It points to a single +// gcBgMarkWorker goroutine. +type gcBgMarkWorkerNode struct { + // Unused workers are managed in a lock-free stack. This field must be first. + node lfnode + + // The g of this worker. + gp guintptr + + // Release this m on park. This is used to communicate with the unlock + // function, which cannot access the G's stack. It is unused outside of + // gcBgMarkWorker(). + m muintptr +} + +func gcBgMarkWorker() { gp := getg() - type parkInfo struct { - m muintptr // Release this m on park. - attach puintptr // If non-nil, attach to this p on park. - } - // We pass park to a gopark unlock function, so it can't be on + // We pass node to a gopark unlock function, so it can't be on // the stack (see gopark). Prevent deadlock from recursively // starting GC by disabling preemption. gp.m.preemptoff = "GC worker init" - park := new(parkInfo) + node := new(gcBgMarkWorkerNode) gp.m.preemptoff = "" - park.m.set(acquirem()) - park.attach.set(_p_) - // Inform gcBgMarkStartWorkers that this worker is ready. - // After this point, the background mark worker is scheduled - // cooperatively by gcController.findRunnable. Hence, it must - // never be preempted, as this would put it into _Grunnable - // and put it on a run queue. Instead, when the preempt flag - // is set, this puts itself into _Gwaiting to be woken up by - // gcController.findRunnable at the appropriate time. + node.gp.set(gp) + + node.m.set(acquirem()) notewakeup(&work.bgMarkReady) + // After this point, the background mark worker is generally scheduled + // cooperatively by gcController.findRunnableGCWorker. While performing + // work on the P, preemption is disabled because we are working on + // P-local work buffers. When the preempt flag is set, this puts itself + // into _Gwaiting to be woken up by gcController.findRunnableGCWorker + // at the appropriate time. + // + // When preemption is enabled (e.g., while in gcMarkDone), this worker + // may be preempted and schedule as a _Grunnable G from a runq. That is + // fine; it will eventually gopark again for further scheduling via + // findRunnableGCWorker. + // + // Since we disable preemption before notifying bgMarkReady, we + // guarantee that this G will be in the worker pool for the next + // findRunnableGCWorker. This isn't strictly necessary, but it reduces + // latency between _GCmark starting and the workers starting. for { - // Go to sleep until woken by gcController.findRunnable. - // We can't releasem yet since even the call to gopark - // may be preempted. - gopark(func(g *g, parkp unsafe.Pointer) bool { - park := (*parkInfo)(parkp) - - // The worker G is no longer running, so it's - // now safe to allow preemption. - releasem(park.m.ptr()) + // Go to sleep until woken by + // gcController.findRunnableGCWorker. + gopark(func(g *g, nodep unsafe.Pointer) bool { + node := (*gcBgMarkWorkerNode)(nodep) - // If the worker isn't attached to its P, - // attach now. During initialization and after - // a phase change, the worker may have been - // running on a different P. As soon as we - // attach, the owner P may schedule the - // worker, so this must be done after the G is - // stopped. - if park.attach != 0 { - p := park.attach.ptr() - park.attach.set(nil) - // cas the worker because we may be - // racing with a new worker starting - // on this P. - if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) { - // The P got a new worker. - // Exit this worker. - return false - } + if mp := node.m.ptr(); mp != nil { + // The worker G is no longer running; release + // the M. + // + // N.B. it is _safe_ to release the M as soon + // as we are no longer performing P-local mark + // work. + // + // However, since we cooperatively stop work + // when gp.preempt is set, if we releasem in + // the loop then the following call to gopark + // would immediately preempt the G. This is + // also safe, but inefficient: the G must + // schedule again only to enter gopark and park + // again. Thus, we defer the release until + // after parking the G. + releasem(mp) } + + // Release this G to the pool. + gcBgMarkWorkerPool.push(&node.node) + // Note that at this point, the G may immediately be + // rescheduled and may be running. return true - }, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0) + }, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0) - // Loop until the P dies and disassociates this - // worker (the P may later be reused, in which case - // it will get a new worker) or we failed to associate. - if _p_.gcBgMarkWorker.ptr() != gp { - break - } + // Preemption must not occur here, or another G might see + // p.gcMarkWorkerMode. // Disable preemption so we can use the gcw. If the // scheduler wants to preempt us, we'll stop draining, // dispose the gcw, and then preempt. - park.m.set(acquirem()) + node.m.set(acquirem()) + pp := gp.m.p.ptr() // P can't change with preemption disabled. if gcBlackenEnabled == 0 { + println("worker mode", pp.gcMarkWorkerMode) throw("gcBgMarkWorker: blackening not enabled") } + if pp.gcMarkWorkerMode == gcMarkWorkerNotWorker { + throw("gcBgMarkWorker: mode not set") + } + startTime := nanotime() - _p_.gcMarkWorkerStartTime = startTime + pp.gcMarkWorkerStartTime = startTime decnwait := atomic.Xadd(&work.nwait, -1) if decnwait == work.nproc { @@ -1951,11 +1973,11 @@ func gcBgMarkWorker(_p_ *p) { // disabled for mark workers, so it is safe to // read from the G stack. casgstatus(gp, _Grunning, _Gwaiting) - switch _p_.gcMarkWorkerMode { + switch pp.gcMarkWorkerMode { default: throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") case gcMarkWorkerDedicatedMode: - gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) if gp.preempt { // We were preempted. This is // a useful signal to kick @@ -1964,7 +1986,7 @@ func gcBgMarkWorker(_p_ *p) { // somewhere else. lock(&sched.lock) for { - gp, _ := runqget(_p_) + gp, _ := runqget(pp) if gp == nil { break } @@ -1974,24 +1996,24 @@ func gcBgMarkWorker(_p_ *p) { } // Go back to draining, this time // without preemption. - gcDrain(&_p_.gcw, gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainFlushBgCredit) case gcMarkWorkerFractionalMode: - gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) case gcMarkWorkerIdleMode: - gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) } casgstatus(gp, _Gwaiting, _Grunning) }) // Account for time. duration := nanotime() - startTime - switch _p_.gcMarkWorkerMode { + switch pp.gcMarkWorkerMode { case gcMarkWorkerDedicatedMode: atomic.Xaddint64(&gcController.dedicatedMarkTime, duration) atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) case gcMarkWorkerFractionalMode: atomic.Xaddint64(&gcController.fractionalMarkTime, duration) - atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration) + atomic.Xaddint64(&pp.gcFractionalMarkTime, duration) case gcMarkWorkerIdleMode: atomic.Xaddint64(&gcController.idleMarkTime, duration) } @@ -2000,31 +2022,27 @@ func gcBgMarkWorker(_p_ *p) { // of work? incnwait := atomic.Xadd(&work.nwait, +1) if incnwait > work.nproc { - println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode, + println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode, "work.nwait=", incnwait, "work.nproc=", work.nproc) throw("work.nwait > work.nproc") } + // We'll releasem after this point and thus this P may run + // something else. We must clear the worker mode to avoid + // attributing the mode to a different (non-worker) G in + // traceGoStart. + pp.gcMarkWorkerMode = gcMarkWorkerNotWorker + // If this worker reached a background mark completion // point, signal the main GC goroutine. if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { - // Make this G preemptible and disassociate it - // as the worker for this P so - // findRunnableGCWorker doesn't try to - // schedule it. - _p_.gcBgMarkWorker.set(nil) - releasem(park.m.ptr()) + // We don't need the P-local buffers here, allow + // preemption becuse we may schedule like a regular + // goroutine in gcMarkDone (block on locks, etc). + releasem(node.m.ptr()) + node.m.set(nil) gcMarkDone() - - // Disable preemption and prepare to reattach - // to the P. - // - // We may be running on a different P at this - // point, so we can't reattach until this G is - // parked. - park.m.set(acquirem()) - park.attach.set(_p_) } } } @@ -2085,7 +2103,7 @@ func gcMark(start_time int64) { // ensured all reachable objects were marked, all of // these must be pointers to black objects. Hence we // can just discard the write barrier buffer. - if debug.gccheckmark > 0 || throwOnGCWork { + if debug.gccheckmark > 0 { // For debugging, flush the buffer and make // sure it really was all marked. wbBufFlush1(p) @@ -2117,13 +2135,21 @@ func gcMark(start_time int64) { gcw.dispose() } - throwOnGCWork = false - - cachestats() - // Update the marked heap stat. memstats.heap_marked = work.bytesMarked + // Flush scanAlloc from each mcache since we're about to modify + // heap_scan directly. If we were to flush this later, then scanAlloc + // might have incorrect information. + for _, p := range allp { + c := p.mcache + if c == nil { + continue + } + memstats.heap_scan += uint64(c.scanAlloc) + c.scanAlloc = 0 + } + // Update other GC heap size stats. This must happen after // cachestats (which flushes local statistics to these) and // flushallmcaches (which modifies heap_live). @@ -2142,6 +2168,8 @@ func gcMark(start_time int64) { // //go:systemstack func gcSweep(mode gcMode) { + assertWorldStopped() + if gcphase != _GCoff { throw("gcSweep being done but phase is not GCoff") } |
