From 2517f4946b42b8deedb864c884f1b41311d45850 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 14 Oct 2020 17:18:27 -0400 Subject: runtime: remove debugCachedWork debugCachedWork and all of its dependent fields and code were added to aid in debugging issue #27993. Now that the source of the problem is known and mitigated (via the extra work check after STW in gcMarkDone), these extra checks are no longer required and simply make the code more difficult to follow. Remove it all. Updates #27993 Change-Id: I594beedd5ca61733ba9cc9eaad8f80ea92df1a0d Reviewed-on: https://go-review.googlesource.com/c/go/+/262350 Trust: Michael Pratt Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Austin Clements --- src/runtime/mgc.go | 121 +++++++++++------------------------------------------ 1 file changed, 24 insertions(+), 97 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index bd87144355..0a4d5616a5 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1407,19 +1407,6 @@ func gcStart(trigger gcTrigger) { // This is protected by markDoneSema. var gcMarkDoneFlushed uint32 -// debugCachedWork enables extra checks for debugging premature mark -// termination. -// -// For debugging issue #27993. -const debugCachedWork = false - -// gcWorkPauseGen is for debugging the mark completion algorithm. -// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen. -// Only used if debugCachedWork is true. -// -// For debugging issue #27993. -var gcWorkPauseGen uint32 = 1 - // gcMarkDone transitions the GC from mark to mark termination if all // reachable objects have been marked (that is, there are no grey // objects and can be no more in the future). Otherwise, it flushes @@ -1475,15 +1462,7 @@ top: // Flush the write barrier buffer, since this may add // work to the gcWork. wbBufFlush1(_p_) - // For debugging, shrink the write barrier - // buffer so it flushes immediately. - // wbBuf.reset will keep it at this size as - // long as throwOnGCWork is set. - if debugCachedWork { - b := &_p_.wbBuf - b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers])) - b.debugGen = gcWorkPauseGen - } + // Flush the gcWork, since this may create global work // and set the flushedWork flag. // @@ -1494,29 +1473,12 @@ top: if _p_.gcw.flushedWork { atomic.Xadd(&gcMarkDoneFlushed, 1) _p_.gcw.flushedWork = false - } else if debugCachedWork { - // For debugging, freeze the gcWork - // until we know whether we've reached - // completion or not. If we think - // we've reached completion, but - // there's a paused gcWork, then - // that's a bug. - _p_.gcw.pauseGen = gcWorkPauseGen - // Capture the G's stack. - for i := range _p_.gcw.pauseStack { - _p_.gcw.pauseStack[i] = 0 - } - callers(1, _p_.gcw.pauseStack[:]) } }) casgstatus(gp, _Gwaiting, _Grunning) }) if gcMarkDoneFlushed != 0 { - if debugCachedWork { - // Release paused gcWorks. - atomic.Xadd(&gcWorkPauseGen, 1) - } // More grey objects were discovered since the // previous termination check, so there may be more // work to do. Keep going. It's possible the @@ -1526,13 +1488,6 @@ top: goto top } - if debugCachedWork { - throwOnGCWork = true - // Release paused gcWorks. If there are any, they - // should now observe throwOnGCWork and panic. - atomic.Xadd(&gcWorkPauseGen, 1) - } - // There was no global work, no local work, and no Ps // communicated work since we took markDoneSema. Therefore // there are no grey objects and no more objects can be @@ -1549,59 +1504,33 @@ top: // below. The important thing is that the wb remains active until // all marking is complete. This includes writes made by the GC. - if debugCachedWork { - // For debugging, double check that no work was added after we - // went around above and disable write barrier buffering. + // There is sometimes work left over when we enter mark termination due + // to write barriers performed after the completion barrier above. + // Detect this and resume concurrent mark. This is obviously + // unfortunate. + // + // See issue #27993 for details. + // + // Switch to the system stack to call wbBufFlush1, though in this case + // it doesn't matter because we're non-preemptible anyway. + restart := false + systemstack(func() { for _, p := range allp { - gcw := &p.gcw - if !gcw.empty() { - printlock() - print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork) - if gcw.wbuf1 == nil { - print(" wbuf1=") - } else { - print(" wbuf1.n=", gcw.wbuf1.nobj) - } - if gcw.wbuf2 == nil { - print(" wbuf2=") - } else { - print(" wbuf2.n=", gcw.wbuf2.nobj) - } - print("\n") - if gcw.pauseGen == gcw.putGen { - println("runtime: checkPut already failed at this generation") - } - throw("throwOnGCWork") + wbBufFlush1(p) + if !p.gcw.empty() { + restart = true + break } } - } else { - // For unknown reasons (see issue #27993), there is - // sometimes work left over when we enter mark - // termination. Detect this and resume concurrent - // mark. This is obviously unfortunate. - // - // Switch to the system stack to call wbBufFlush1, - // though in this case it doesn't matter because we're - // non-preemptible anyway. - restart := false + }) + if restart { + getg().m.preemptoff = "" systemstack(func() { - for _, p := range allp { - wbBufFlush1(p) - if !p.gcw.empty() { - restart = true - break - } - } + now := startTheWorldWithSema(true) + work.pauseNS += now - work.pauseStart }) - if restart { - getg().m.preemptoff = "" - systemstack(func() { - now := startTheWorldWithSema(true) - work.pauseNS += now - work.pauseStart - }) - semrelease(&worldsema) - goto top - } + semrelease(&worldsema) + goto top } // Disable assists and background workers. We must do @@ -2085,7 +2014,7 @@ func gcMark(start_time int64) { // ensured all reachable objects were marked, all of // these must be pointers to black objects. Hence we // can just discard the write barrier buffer. - if debug.gccheckmark > 0 || throwOnGCWork { + if debug.gccheckmark > 0 { // For debugging, flush the buffer and make // sure it really was all marked. wbBufFlush1(p) @@ -2117,8 +2046,6 @@ func gcMark(start_time int64) { gcw.dispose() } - throwOnGCWork = false - cachestats() // Update the marked heap stat. -- cgit v1.3 From e313fd7448ed0dabf98dc725bee2361e905f208b Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Thu, 9 Jul 2020 16:20:48 -0400 Subject: runtime: drop unused work.ndone field This field is unused since golang.org/cl/134785 and thus can be trivially removed. Change-Id: I1a87f8e78ffdf662440409404f0251c40bc56a4f Reviewed-on: https://go-review.googlesource.com/c/go/+/241741 Trust: Michael Pratt Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Michael Knyszek --- src/runtime/mgc.go | 1 - 1 file changed, 1 deletion(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 0a4d5616a5..65ac654b14 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -983,7 +983,6 @@ var work struct { nproc uint32 tstart int64 nwait uint32 - ndone uint32 // Number of roots of various root types. Set by gcMarkRootPrepare. nFlushCacheRoots int -- cgit v1.3 From db185e543fe471c522790b7d93291e786dc54a84 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 7 Jul 2020 17:55:40 -0400 Subject: runtime: drop redundant gcBlackenEnabled reset This reset of gcBlackenEnabled is a no-op because it was already reset almost immediately before in gcMarkDone, which is the only caller of gcMarkTermination. Adjust the comment to clarify setGCPhase a bit more. We are coming from _GCmark, so write barriers are already enabled. Change-Id: Ieac2dadf33c3c5a44e8a25a499dea8cfe03b8d73 Reviewed-on: https://go-review.googlesource.com/c/go/+/241357 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Trust: Michael Pratt Reviewed-by: Michael Knyszek --- src/runtime/mgc.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 65ac654b14..c42c7fbd29 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1558,10 +1558,10 @@ top: gcMarkTermination(nextTriggerRatio) } +// World must be stopped and mark assists and background workers must be +// disabled. func gcMarkTermination(nextTriggerRatio float64) { - // World is stopped. - // Start marktermination which includes enabling the write barrier. - atomic.Store(&gcBlackenEnabled, 0) + // Start marktermination (write barrier remains enabled for now). setGCPhase(_GCmarktermination) work.heap1 = memstats.heap_live -- cgit v1.3 From 8cc280aa727bc7159adfdd083861472aa3066a35 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 20:13:49 +0000 Subject: runtime: define and enforce synchronization on heap_scan Currently heap_scan is mostly protected by the heap lock, but gcControllerState.revise sometimes accesses it without a lock. In an effort to make gcControllerState.revise callable from more contexts (and have its synchronization guarantees actually respected), make heap_scan atomically read from and written to, unless the world is stopped. Note that we don't update gcControllerState.revise's erroneous doc comment here because this change isn't about revise's guarantees, just about heap_scan. The comment is updated in a later change. Change-Id: Iddbbeb954767c704c2bd1d221f36e6c4fc9948a6 Reviewed-on: https://go-review.googlesource.com/c/go/+/246960 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Emmanuel Odeke Reviewed-by: Michael Pratt --- src/runtime/mgc.go | 5 +++-- src/runtime/mheap.go | 4 ++-- src/runtime/mstats.go | 4 +++- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index c42c7fbd29..94539dd770 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -494,6 +494,7 @@ func (c *gcControllerState) revise() { gcpercent = 100000 } live := atomic.Load64(&memstats.heap_live) + scan := atomic.Load64(&memstats.heap_scan) // Assume we're under the soft goal. Pace GC to complete at // next_gc assuming the heap is in steady-state. @@ -508,7 +509,7 @@ func (c *gcControllerState) revise() { // // (This is a float calculation to avoid overflowing on // 100*heap_scan.) - scanWorkExpected := int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent)) + scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent)) if live > memstats.next_gc || c.scanWork > scanWorkExpected { // We're past the soft goal, or we've already done more scan @@ -518,7 +519,7 @@ func (c *gcControllerState) revise() { heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) // Compute the upper bound on the scan work remaining. - scanWorkExpected = int64(memstats.heap_scan) + scanWorkExpected = int64(scan) } // Compute the remaining scan work estimate. diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 1a57bcd66e..124bbacd1d 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1168,7 +1168,7 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS throw("mheap.allocSpan called with no P") } } - memstats.heap_scan += uint64(c.local_scan) + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 memstats.tinyallocs += uint64(c.local_tinyallocs) c.local_tinyallocs = 0 @@ -1375,7 +1375,7 @@ func (h *mheap) freeSpan(s *mspan) { systemstack(func() { c := getg().m.p.ptr().mcache lock(&h.lock) - memstats.heap_scan += uint64(c.local_scan) + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 memstats.tinyallocs += uint64(c.local_tinyallocs) c.local_tinyallocs = 0 diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index b95b332134..2c217ecf84 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -139,6 +139,8 @@ type mstats struct { // no-scan objects and no-scan tails of objects. // // Whenever this is updated, call gcController.revise(). + // + // Read and written atomically or with the world stopped. heap_scan uint64 // heap_marked is the number of bytes marked by the previous @@ -635,7 +637,7 @@ func flushallmcaches() { func purgecachedstats(c *mcache) { // Protected by either heap or GC lock. h := &mheap_ - memstats.heap_scan += uint64(c.local_scan) + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) c.local_scan = 0 memstats.tinyallocs += uint64(c.local_tinyallocs) c.local_tinyallocs = 0 -- cgit v1.3 From 93d7d1685ee9e9f296e20f6c712796e54602e891 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 20:17:40 +0000 Subject: runtime: load gcControllerState.scanWork atomically in revise gcControllerState.scanWork's docs state that it must be accessed atomically during a GC cycle, but gcControllerState.revise does not do this (even when called with the heap lock held). This change makes it so that gcControllerState.revise accesses scanWork atomically and explicitly. Note that we don't update gcControllerState.revise's erroneous doc comment here because this change isn't about revise's guarantees, just about heap_scan. The comment is updated in a later change. Change-Id: Iafc3ad214e517190bfd8a219896d23da19f7659d Reviewed-on: https://go-review.googlesource.com/c/go/+/246961 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mgc.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 94539dd770..4b9a6da3b3 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -495,6 +495,7 @@ func (c *gcControllerState) revise() { } live := atomic.Load64(&memstats.heap_live) scan := atomic.Load64(&memstats.heap_scan) + work := atomic.Loadint64(&c.scanWork) // Assume we're under the soft goal. Pace GC to complete at // next_gc assuming the heap is in steady-state. @@ -511,7 +512,7 @@ func (c *gcControllerState) revise() { // 100*heap_scan.) scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent)) - if live > memstats.next_gc || c.scanWork > scanWorkExpected { + if live > memstats.next_gc || work > scanWorkExpected { // We're past the soft goal, or we've already done more scan // work than we expected. Pace GC so that in the worst case it // will complete by the hard goal. @@ -529,7 +530,7 @@ func (c *gcControllerState) revise() { // (scanWork), so allocation will change this difference // slowly in the soft regime and not at all in the hard // regime. - scanWorkRemaining := scanWorkExpected - c.scanWork + scanWorkRemaining := scanWorkExpected - work if scanWorkRemaining < 1000 { // We set a somewhat arbitrary lower bound on // remaining scan work since if we aim a little high, -- cgit v1.3 From f5c6875f3228951afa1fcf2ec01c614e0fb7e2dd Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 20:24:56 +0000 Subject: runtime: make next_gc atomically accessed next_gc is mostly updated only during a STW, but may occasionally be updated by calls to e.g. debug.SetGCPercent. In this case the update is supposed to be protected by the heap lock, but in reality it's accessed by gcController.revise which may be called without the heap lock held (despite its documentation, which will be updated in a later change). Change the synchronization policy on next_gc so that it's atomically accessed when the world is not stopped to aid in making revise safe for concurrent use. Change-Id: I79657a72f91563f3241aaeda66e8a7757d399529 Reviewed-on: https://go-review.googlesource.com/c/go/+/246962 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mgc.go | 13 +++++++------ src/runtime/mgcscavenge.go | 2 +- src/runtime/mstats.go | 10 ++++++++-- src/runtime/trace.go | 5 +++-- 4 files changed, 19 insertions(+), 11 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 4b9a6da3b3..5c565a5853 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -409,7 +409,8 @@ type gcControllerState struct { } // startCycle resets the GC controller's state and computes estimates -// for a new GC cycle. The caller must hold worldsema. +// for a new GC cycle. The caller must hold worldsema and the world +// must be stopped. func (c *gcControllerState) startCycle() { c.scanWork = 0 c.bgScanCredit = 0 @@ -499,7 +500,7 @@ func (c *gcControllerState) revise() { // Assume we're under the soft goal. Pace GC to complete at // next_gc assuming the heap is in steady-state. - heapGoal := int64(memstats.next_gc) + heapGoal := int64(atomic.Load64(&memstats.next_gc)) // Compute the expected scan work remaining. // @@ -512,12 +513,12 @@ func (c *gcControllerState) revise() { // 100*heap_scan.) scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent)) - if live > memstats.next_gc || work > scanWorkExpected { + if int64(live) > heapGoal || work > scanWorkExpected { // We're past the soft goal, or we've already done more scan // work than we expected. Pace GC so that in the worst case it // will complete by the hard goal. const maxOvershoot = 1.1 - heapGoal = int64(float64(memstats.next_gc) * maxOvershoot) + heapGoal = int64(float64(heapGoal) * maxOvershoot) // Compute the upper bound on the scan work remaining. scanWorkExpected = int64(scan) @@ -846,7 +847,7 @@ func gcSetTriggerRatio(triggerRatio float64) { // Commit to the trigger and goal. memstats.gc_trigger = trigger - memstats.next_gc = goal + atomic.Store64(&memstats.next_gc, goal) if trace.enabled { traceNextGC() } @@ -903,7 +904,7 @@ func gcSetTriggerRatio(triggerRatio float64) { // // mheap_.lock must be held or the world must be stopped. func gcEffectiveGrowthRatio() float64 { - egogc := float64(memstats.next_gc-memstats.heap_marked) / float64(memstats.heap_marked) + egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked) if egogc < 0 { // Shouldn't happen, but just in case. egogc = 0 diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go index 34646828e5..6328b295ca 100644 --- a/src/runtime/mgcscavenge.go +++ b/src/runtime/mgcscavenge.go @@ -123,7 +123,7 @@ func gcPaceScavenger() { return } // Compute our scavenging goal. - goalRatio := float64(memstats.next_gc) / float64(memstats.last_next_gc) + goalRatio := float64(atomic.Load64(&memstats.next_gc)) / float64(memstats.last_next_gc) retainedGoal := uint64(float64(memstats.last_heap_inuse) * goalRatio) // Add retainExtraPercent overhead to retainedGoal. This calculation // looks strange but the purpose is to arrive at an integer division diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 2c217ecf84..8cc20552fb 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -57,9 +57,15 @@ type mstats struct { gc_sys uint64 // updated atomically or during STW other_sys uint64 // updated atomically or during STW - // Statistics about garbage collector. + // Statistics about the garbage collector. + + // next_gc is the goal heap_live for when next GC ends. + // Set to ^uint64(0) if disabled. + // + // Read and written atomically, unless the world is stopped. + next_gc uint64 + // Protected by mheap or stopping the world during GC. - next_gc uint64 // goal heap_live for when next GC ends; ^0 if disabled last_gc_unix uint64 // last gc (in unix time) pause_total_ns uint64 pause_ns [256]uint64 // circular buffer of recent gc pause lengths diff --git a/src/runtime/trace.go b/src/runtime/trace.go index 169b650eb4..d3ecd148be 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -13,6 +13,7 @@ package runtime import ( + "runtime/internal/atomic" "runtime/internal/sys" "unsafe" ) @@ -1146,11 +1147,11 @@ func traceHeapAlloc() { } func traceNextGC() { - if memstats.next_gc == ^uint64(0) { + if nextGC := atomic.Load64(&memstats.next_gc); nextGC == ^uint64(0) { // Heap-based triggering is disabled. traceEvent(traceEvNextGC, -1, 0) } else { - traceEvent(traceEvNextGC, -1, memstats.next_gc) + traceEvent(traceEvNextGC, -1, nextGC) } } -- cgit v1.3 From ce46f197b6c75281b77ee93338e2559671e28b01 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 20:48:06 +0000 Subject: runtime: access the assist ratio atomically This change makes it so that the GC assist ratio (the pair of gcControllerState fields assistBytesPerWork and assistWorkPerByte) is updated atomically. Note that the pair of fields are not updated together atomically, but that's OK. The code here was already racy for some time and in practice the assist ratio moves very slowly. The purpose of this change is so that we can document gcController.revise to be safe for concurrent use, which will be useful in further changes. Change-Id: Ie25d630207c88e4f85f2b8953f6a0051ebf1b4ea Reviewed-on: https://go-review.googlesource.com/c/go/+/246963 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mgc.go | 51 ++++++++++++++++++++++++++++++++++++++++++-------- src/runtime/mgcmark.go | 17 +++++++++++------ src/runtime/proc.go | 3 ++- 3 files changed, 56 insertions(+), 15 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 5c565a5853..c54f893689 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -388,10 +388,24 @@ type gcControllerState struct { // bytes that should be performed by mutator assists. This is // computed at the beginning of each cycle and updated every // time heap_scan is updated. - assistWorkPerByte float64 + // + // Stored as a uint64, but it's actually a float64. Use + // float64frombits to get the value. + // + // Read and written atomically. + assistWorkPerByte uint64 // assistBytesPerWork is 1/assistWorkPerByte. - assistBytesPerWork float64 + // + // Stored as a uint64, but it's actually a float64. Use + // float64frombits to get the value. + // + // Read and written atomically. + // + // Note that because this is read and written independently + // from assistWorkPerByte users may notice a skew between + // the two values, and such a state should be safe. + assistBytesPerWork uint64 // fractionalUtilizationGoal is the fraction of wall clock // time that should be spent in the fractional mark worker on @@ -470,7 +484,8 @@ func (c *gcControllerState) startCycle() { c.revise() if debug.gcpacertrace > 0 { - print("pacer: assist ratio=", c.assistWorkPerByte, + assistRatio := float64frombits(atomic.Load64(&c.assistWorkPerByte)) + print("pacer: assist ratio=", assistRatio, " (scan ", memstats.heap_scan>>20, " MB in ", work.initialHeapLive>>20, "->", memstats.next_gc>>20, " MB)", @@ -480,9 +495,22 @@ func (c *gcControllerState) startCycle() { } // revise updates the assist ratio during the GC cycle to account for -// improved estimates. This should be called either under STW or -// whenever memstats.heap_scan, memstats.heap_live, or -// memstats.next_gc is updated (with mheap_.lock held). +// improved estimates. This should be called whenever memstats.heap_scan, +// memstats.heap_live, or memstats.next_gc is updated. It is safe to +// call concurrently, but it may race with other calls to revise. +// +// The result of this race is that the two assist ratio values may not line +// up or may be stale. In practice this is OK because the assist ratio +// moves slowly throughout a GC cycle, and the assist ratio is a best-effort +// heuristic anyway. Furthermore, no part of the heuristic depends on +// the two assist ratio values being exact reciprocals of one another, since +// the two values are used to convert values from different sources. +// +// The worst case result of this raciness is that we may miss a larger shift +// in the ratio (say, if we decide to pace more aggressively against the +// hard heap goal) but even this "hard goal" is best-effort (see #40460). +// The dedicated GC should ensure we don't exceed the hard goal by too much +// in the rare case we do exceed it. // // It should only be called when gcBlackenEnabled != 0 (because this // is when assists are enabled and the necessary statistics are @@ -555,8 +583,15 @@ func (c *gcControllerState) revise() { // Compute the mutator assist ratio so by the time the mutator // allocates the remaining heap bytes up to next_gc, it will // have done (or stolen) the remaining amount of scan work. - c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining) - c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining) + // Note that the assist ratio values are updated atomically + // but not together. This means there may be some degree of + // skew between the two values. This is generally OK as the + // values shift relatively slowly over the course of a GC + // cycle. + assistWorkPerByte := float64(scanWorkRemaining) / float64(heapRemaining) + assistBytesPerWork := float64(heapRemaining) / float64(scanWorkRemaining) + atomic.Store64(&c.assistWorkPerByte, float64bits(assistWorkPerByte)) + atomic.Store64(&c.assistBytesPerWork, float64bits(assistBytesPerWork)) } // endCycle computes the trigger ratio for the next cycle. diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 79df59d6d6..c71c0e58d3 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -400,11 +400,13 @@ retry: // balance positive. When the required amount of work is low, // we over-assist to build up credit for future allocations // and amortize the cost of assisting. + assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte)) + assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork)) debtBytes := -gp.gcAssistBytes - scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes)) + scanWork := int64(assistWorkPerByte * float64(debtBytes)) if scanWork < gcOverAssistWork { scanWork = gcOverAssistWork - debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork)) + debtBytes = int64(assistBytesPerWork * float64(scanWork)) } // Steal as much credit as we can from the background GC's @@ -418,7 +420,7 @@ retry: if bgScanCredit > 0 { if bgScanCredit < scanWork { stolen = bgScanCredit - gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen)) + gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(stolen)) } else { stolen = scanWork gp.gcAssistBytes += debtBytes @@ -543,7 +545,8 @@ func gcAssistAlloc1(gp *g, scanWork int64) { // this scan work counts for. The "1+" is a poor man's // round-up, to ensure this adds credit even if // assistBytesPerWork is very low. - gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone)) + assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork)) + gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(workDone)) // If this is the last worker and we ran out of work, // signal a completion point. @@ -637,7 +640,8 @@ func gcFlushBgCredit(scanWork int64) { return } - scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork) + assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork)) + scanBytes := int64(float64(scanWork) * assistBytesPerWork) lock(&work.assistQueue.lock) for !work.assistQueue.q.empty() && scanBytes > 0 { @@ -670,7 +674,8 @@ func gcFlushBgCredit(scanWork int64) { if scanBytes > 0 { // Convert from scan bytes back to work. - scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte) + assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte)) + scanWork = int64(float64(scanBytes) * assistWorkPerByte) atomic.Xaddint64(&gcController.bgScanCredit, scanWork) } unlock(&work.assistQueue.lock) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index ec4e6d8751..ebecc92745 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -3208,7 +3208,8 @@ func goexit0(gp *g) { // Flush assist credit to the global pool. This gives // better information to pacing if the application is // rapidly creating an exiting goroutines. - scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes)) + assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte)) + scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes)) atomic.Xaddint64(&gcController.bgScanCredit, scanCredit) gp.gcAssistBytes = 0 } -- cgit v1.3 From d677899e903c4741920846f1af2c14c56f6e710e Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 23 Jul 2020 22:36:58 +0000 Subject: runtime: flush local_scan directly and more often Now that local_scan is the last mcache-based statistic that is flushed by purgecachedstats, and heap_scan and gcController.revise may be interacted with concurrently, we don't need to flush heap_scan at arbitrary locations where the heap is locked, and we don't need purgecachedstats and cachestats anymore. Instead, we can flush local_scan at the same time we update heap_live in refill, so the two updates may share the same revise call. Clean up unused functions, remove code that would cause the heap to get locked in the allocSpan when it didn't need to (other than to flush local_scan), and flush local_scan explicitly in a few important places. Notably we need to flush local_scan whenever we flush the other stats, but it doesn't need to be donated anywhere, so have releaseAll do the flushing. Also, we need to flush local_scan before we set heap_scan at the end of a GC, which was previously handled by cachestats. Just do so explicitly -- it's not much code and it becomes a lot more clear why we need to do so. Change-Id: I35ac081784df7744d515479896a41d530653692d Reviewed-on: https://go-review.googlesource.com/c/go/+/246968 Run-TryBot: Michael Knyszek Trust: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/mcache.go | 22 ++++++++++++++++++++-- src/runtime/mgc.go | 14 ++++++++++++-- src/runtime/mheap.go | 49 +++---------------------------------------------- src/runtime/mstats.go | 25 ------------------------- 4 files changed, 35 insertions(+), 75 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index fe603116a2..b8e388cc4f 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -124,7 +124,6 @@ func freemcache(c *mcache, recipient *mcache) { // gcworkbuffree(c.gcworkbuf) lock(&mheap_.lock) - purgecachedstats(c) // Donate anything else that's left. c.donate(recipient) mheap_.cachealloc.free(unsafe.Pointer(c)) @@ -135,6 +134,8 @@ func freemcache(c *mcache, recipient *mcache) { // donate flushes data and resources which have no global // pool to another mcache. func (c *mcache) donate(d *mcache) { + // local_scan is handled separately because it's not + // like these stats -- it's used for GC pacing. d.local_largealloc += c.local_largealloc c.local_largealloc = 0 d.local_nlargealloc += c.local_nlargealloc @@ -192,14 +193,22 @@ func (c *mcache) refill(spc spanClass) { // Assume all objects from this span will be allocated in the // mcache. If it gets uncached, we'll adjust this. c.local_nsmallalloc[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount) + + // Update heap_live with the same assumption. usedBytes := uintptr(s.allocCount) * s.elemsize atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes)) + + // While we're here, flush local_scan, since we have to call + // revise anyway. + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) + c.local_scan = 0 + if trace.enabled { // heap_live changed. traceHeapAlloc() } if gcBlackenEnabled != 0 { - // heap_live changed. + // heap_live and heap_scan changed. gcController.revise() } @@ -248,6 +257,10 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { } func (c *mcache) releaseAll() { + // Take this opportunity to flush local_scan. + atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) + c.local_scan = 0 + sg := mheap_.sweepgen for i := range c.alloc { s := c.alloc[i] @@ -273,6 +286,11 @@ func (c *mcache) releaseAll() { // Clear tinyalloc pool. c.tiny = 0 c.tinyoffset = 0 + + // Updated heap_scan and possible heap_live. + if gcBlackenEnabled != 0 { + gcController.revise() + } } // prepareForSweep flushes c if the system has entered a new sweep phase diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index c54f893689..55554c117c 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -2083,11 +2083,21 @@ func gcMark(start_time int64) { gcw.dispose() } - cachestats() - // Update the marked heap stat. memstats.heap_marked = work.bytesMarked + // Flush local_scan from each mcache since we're about to modify + // heap_scan directly. If we were to flush this later, then local_scan + // might have incorrect information. + for _, p := range allp { + c := p.mcache + if c == nil { + continue + } + memstats.heap_scan += uint64(c.local_scan) + c.local_scan = 0 + } + // Update other GC heap size stats. This must happen after // cachestats (which flushes local statistics to these) and // flushallmcaches (which modifies heap_live). diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 47f86ee38c..40fd58b0ef 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -1102,23 +1102,11 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS base, scav = c.alloc(npages) if base != 0 { s = h.tryAllocMSpan() - - if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) { + if s != nil { goto HaveSpan } - // We're either running duing GC, failed to acquire a mspan, - // or the allocation is for a large object. This means we - // have to lock the heap and do a bunch of extra work, - // so go down the HaveBaseLocked path. - // - // We must do this during GC to avoid skew with heap_scan - // since we flush mcache stats whenever we lock. - // - // TODO(mknyszek): It would be nice to not have to - // lock the heap if it's a large allocation, but - // it's fine for now. The critical section here is - // short and large object allocations are relatively - // infrequent. + // We have a base but no mspan, so we need + // to lock the heap. } } @@ -1145,30 +1133,6 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS // one now that we have the heap lock. s = h.allocMSpanLocked() } - if !manual { - // This is a heap span, so we should do some additional accounting - // which may only be done with the heap locked. - - // Transfer stats from mcache to global. - var c *mcache - if gp.m.p != 0 { - c = gp.m.p.ptr().mcache - } else { - // This case occurs while bootstrapping. - // See the similar code in mallocgc. - c = mcache0 - if c == nil { - throw("mheap.allocSpan called with no P") - } - } - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 - - // heap_scan was been updated. - if gcBlackenEnabled != 0 { - gcController.revise() - } - } unlock(&h.lock) HaveSpan: @@ -1352,20 +1316,13 @@ func (h *mheap) grow(npage uintptr) bool { // Free the span back into the heap. func (h *mheap) freeSpan(s *mspan) { systemstack(func() { - c := getg().m.p.ptr().mcache lock(&h.lock) - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 if msanenabled { // Tell msan that this entire span is no longer in use. base := unsafe.Pointer(s.base()) bytes := s.npages << _PageShift msanfree(base, bytes) } - if gcBlackenEnabled != 0 { - // heap_scan changed. - gcController.revise() - } h.freeSpanLocked(s, true, true) unlock(&h.lock) }) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 341906fced..5eeb173640 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -556,9 +556,6 @@ func updatememstats() { memstats.by_size[i].nfree = 0 } - // Aggregate local stats. - cachestats() - // Collect allocation stats. This is safe and consistent // because the world is stopped. var smallFree, totalAlloc, totalFree uint64 @@ -602,21 +599,6 @@ func updatememstats() { memstats.heap_objects = memstats.nmalloc - memstats.nfree } -// cachestats flushes all mcache stats. -// -// The world must be stopped. -// -//go:nowritebarrier -func cachestats() { - for _, p := range allp { - c := p.mcache - if c == nil { - continue - } - purgecachedstats(c) - } -} - // flushmcache flushes the mcache of allp[i]. // // The world must be stopped. @@ -643,13 +625,6 @@ func flushallmcaches() { } } -//go:nosplit -func purgecachedstats(c *mcache) { - // Protected by heap lock. - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 -} - // Atomically increases a given *system* memory stat. We are counting on this // stat never overflowing a uintptr, so this function must only be used for // system memory stats. -- cgit v1.3 From c8638498008f9874dc5a48734418e0fbea08cee9 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Fri, 24 Jul 2020 19:58:31 +0000 Subject: runtime: rename mcache fields to match Go style This change renames a bunch of malloc statistics stored in the mcache that are all named with the "local_" prefix. It also renames largeAlloc to allocLarge to prevent a naming conflict, and next_sample because it would be the last mcache field with the old C naming style. Change-Id: I29695cb83b397a435ede7e9ad5c3c9be72767ea3 Reviewed-on: https://go-review.googlesource.com/c/go/+/246969 Trust: Michael Knyszek Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 14 ++++---- src/runtime/malloc.go | 12 +++---- src/runtime/mcache.go | 78 ++++++++++++++++++++--------------------- src/runtime/mgc.go | 8 ++--- src/runtime/mgcsweep.go | 6 ++-- src/runtime/mstats.go | 22 ++++++------ src/runtime/pprof/mprof_test.go | 2 +- 7 files changed, 71 insertions(+), 71 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index d71b180f76..47cbc286f6 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -346,18 +346,18 @@ func ReadMemStatsSlow() (base, slow MemStats) { continue } // Collect large allocation stats. - largeFree += uint64(c.local_largefree) - slow.Frees += uint64(c.local_nlargefree) + largeFree += uint64(c.largeFree) + slow.Frees += uint64(c.largeFreeCount) // Collect tiny allocation stats. - tinyAllocs += uint64(c.local_tinyallocs) + tinyAllocs += uint64(c.tinyAllocCount) // Collect per-sizeclass stats. for i := 0; i < _NumSizeClasses; i++ { - slow.Frees += uint64(c.local_nsmallfree[i]) - bySize[i].Frees += uint64(c.local_nsmallfree[i]) - bySize[i].Mallocs += uint64(c.local_nsmallfree[i]) - smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i]) + slow.Frees += uint64(c.smallFreeCount[i]) + bySize[i].Frees += uint64(c.smallFreeCount[i]) + bySize[i].Mallocs += uint64(c.smallFreeCount[i]) + smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i]) } } slow.Frees += tinyAllocs diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index ec601ccb39..0f48d7f68e 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1040,7 +1040,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { // The object fits into existing tiny block. x = unsafe.Pointer(c.tiny + off) c.tinyoffset = off + size - c.local_tinyallocs++ + c.tinyAllocCount++ mp.mallocing = 0 releasem(mp) return x @@ -1082,7 +1082,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { } } else { shouldhelpgc = true - span = c.largeAlloc(size, needzero, noscan) + span = c.allocLarge(size, needzero, noscan) span.freeindex = 1 span.allocCount = 1 x = unsafe.Pointer(span.base()) @@ -1111,7 +1111,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { } else { scanSize = typ.ptrdata } - c.local_scan += scanSize + c.scanAlloc += scanSize } // Ensure that the stores above that initialize x to @@ -1153,8 +1153,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { } if rate := MemProfileRate; rate > 0 { - if rate != 1 && size < c.next_sample { - c.next_sample -= size + if rate != 1 && size < c.nextSample { + c.nextSample -= size } else { mp := acquirem() profilealloc(mp, x, size) @@ -1221,7 +1221,7 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { throw("profilealloc called with no P") } } - c.next_sample = nextSample() + c.nextSample = nextSample() mProf_Malloc(x, size) } diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index b8e388cc4f..c3e0e5e1f7 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -20,8 +20,8 @@ import ( type mcache struct { // The following members are accessed on every malloc, // so they are grouped here for better caching. - next_sample uintptr // trigger heap sample after allocating this many bytes - local_scan uintptr // bytes of scannable heap allocated + nextSample uintptr // trigger heap sample after allocating this many bytes + scanAlloc uintptr // bytes of scannable heap allocated // Allocator cache for tiny objects w/o pointers. // See "Tiny allocator" comment in malloc.go. @@ -48,13 +48,13 @@ type mcache struct { // When read with stats from other mcaches and with the world // stopped, the result will accurately reflect the state of the // application. - local_tinyallocs uintptr // number of tiny allocs not counted in other stats - local_largealloc uintptr // bytes allocated for large objects - local_nlargealloc uintptr // number of large object allocations - local_nsmallalloc [_NumSizeClasses]uintptr // number of allocs for small objects - local_largefree uintptr // bytes freed for large objects (>maxsmallsize) - local_nlargefree uintptr // number of frees for large objects (>maxsmallsize) - local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize) + tinyAllocCount uintptr // number of tiny allocs not counted in other stats + largeAlloc uintptr // bytes allocated for large objects + largeAllocCount uintptr // number of large object allocations + smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects + largeFree uintptr // bytes freed for large objects (>maxSmallSize) + largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) + smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) // flushGen indicates the sweepgen during which this mcache // was last flushed. If flushGen != mheap_.sweepgen, the spans @@ -103,7 +103,7 @@ func allocmcache() *mcache { for i := range c.alloc { c.alloc[i] = &emptymspan } - c.next_sample = nextSample() + c.nextSample = nextSample() return c } @@ -134,26 +134,26 @@ func freemcache(c *mcache, recipient *mcache) { // donate flushes data and resources which have no global // pool to another mcache. func (c *mcache) donate(d *mcache) { - // local_scan is handled separately because it's not + // scanAlloc is handled separately because it's not // like these stats -- it's used for GC pacing. - d.local_largealloc += c.local_largealloc - c.local_largealloc = 0 - d.local_nlargealloc += c.local_nlargealloc - c.local_nlargealloc = 0 - for i := range c.local_nsmallalloc { - d.local_nsmallalloc[i] += c.local_nsmallalloc[i] - c.local_nsmallalloc[i] = 0 + d.largeAlloc += c.largeAlloc + c.largeAlloc = 0 + d.largeAllocCount += c.largeAllocCount + c.largeAllocCount = 0 + for i := range c.smallAllocCount { + d.smallAllocCount[i] += c.smallAllocCount[i] + c.smallAllocCount[i] = 0 } - d.local_largefree += c.local_largefree - c.local_largefree = 0 - d.local_nlargefree += c.local_nlargefree - c.local_nlargefree = 0 - for i := range c.local_nsmallfree { - d.local_nsmallfree[i] += c.local_nsmallfree[i] - c.local_nsmallfree[i] = 0 + d.largeFree += c.largeFree + c.largeFree = 0 + d.largeFreeCount += c.largeFreeCount + c.largeFreeCount = 0 + for i := range c.smallFreeCount { + d.smallFreeCount[i] += c.smallFreeCount[i] + c.smallFreeCount[i] = 0 } - d.local_tinyallocs += c.local_tinyallocs - c.local_tinyallocs = 0 + d.tinyAllocCount += c.tinyAllocCount + c.tinyAllocCount = 0 } // refill acquires a new span of span class spc for c. This span will @@ -192,16 +192,16 @@ func (c *mcache) refill(spc spanClass) { // Assume all objects from this span will be allocated in the // mcache. If it gets uncached, we'll adjust this. - c.local_nsmallalloc[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount) + c.smallAllocCount[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount) // Update heap_live with the same assumption. usedBytes := uintptr(s.allocCount) * s.elemsize atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes)) - // While we're here, flush local_scan, since we have to call + // While we're here, flush scanAlloc, since we have to call // revise anyway. - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 + atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc)) + c.scanAlloc = 0 if trace.enabled { // heap_live changed. @@ -215,8 +215,8 @@ func (c *mcache) refill(spc spanClass) { c.alloc[spc] = s } -// largeAlloc allocates a span for a large object. -func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { +// allocLarge allocates a span for a large object. +func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) *mspan { if size+_PageSize < size { throw("out of memory") } @@ -235,8 +235,8 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { if s == nil { throw("out of memory") } - c.local_largealloc += npages * pageSize - c.local_nlargealloc++ + c.largeAlloc += npages * pageSize + c.largeAllocCount++ // Update heap_live and revise pacing if needed. atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize)) @@ -257,9 +257,9 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan { } func (c *mcache) releaseAll() { - // Take this opportunity to flush local_scan. - atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan)) - c.local_scan = 0 + // Take this opportunity to flush scanAlloc. + atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc)) + c.scanAlloc = 0 sg := mheap_.sweepgen for i := range c.alloc { @@ -267,7 +267,7 @@ func (c *mcache) releaseAll() { if s != &emptymspan { // Adjust nsmallalloc in case the span wasn't fully allocated. n := uintptr(s.nelems) - uintptr(s.allocCount) - c.local_nsmallalloc[spanClass(i).sizeclass()] -= n + c.smallAllocCount[spanClass(i).sizeclass()] -= n if s.sweepgen != sg+1 { // refill conservatively counted unallocated slots in heap_live. // Undo this. diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 55554c117c..540c376f1c 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -2086,16 +2086,16 @@ func gcMark(start_time int64) { // Update the marked heap stat. memstats.heap_marked = work.bytesMarked - // Flush local_scan from each mcache since we're about to modify - // heap_scan directly. If we were to flush this later, then local_scan + // Flush scanAlloc from each mcache since we're about to modify + // heap_scan directly. If we were to flush this later, then scanAlloc // might have incorrect information. for _, p := range allp { c := p.mcache if c == nil { continue } - memstats.heap_scan += uint64(c.local_scan) - c.local_scan = 0 + memstats.heap_scan += uint64(c.scanAlloc) + c.scanAlloc = 0 } // Update other GC heap size stats. This must happen after diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 6b8c56ce35..7103b08455 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -503,7 +503,7 @@ func (s *mspan) sweep(preserve bool) bool { // wasn't totally filled, but then swept, still has all of its // free slots zeroed. s.needzero = 1 - c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed) + c.smallFreeCount[spc.sizeclass()] += uintptr(nfreed) } if !preserve { // The caller may not have removed this span from whatever @@ -548,8 +548,8 @@ func (s *mspan) sweep(preserve bool) bool { } else { mheap_.freeSpan(s) } - c.local_nlargefree++ - c.local_largefree += size + c.largeFreeCount++ + c.largeFree += size return true } diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 5eeb173640..64687c24e5 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -565,25 +565,25 @@ func updatememstats() { continue } // Collect large allocation stats. - memstats.nmalloc += uint64(c.local_nlargealloc) - totalAlloc += uint64(c.local_largealloc) - totalFree += uint64(c.local_largefree) - memstats.nfree += uint64(c.local_nlargefree) + memstats.nmalloc += uint64(c.largeAllocCount) + totalAlloc += uint64(c.largeAlloc) + totalFree += uint64(c.largeFree) + memstats.nfree += uint64(c.largeFreeCount) // Collect tiny allocation stats. - memstats.tinyallocs += uint64(c.local_tinyallocs) + memstats.tinyallocs += uint64(c.tinyAllocCount) // Collect per-sizeclass stats. for i := 0; i < _NumSizeClasses; i++ { // Malloc stats. - memstats.nmalloc += uint64(c.local_nsmallalloc[i]) - memstats.by_size[i].nmalloc += uint64(c.local_nsmallalloc[i]) - totalAlloc += uint64(c.local_nsmallalloc[i]) * uint64(class_to_size[i]) + memstats.nmalloc += uint64(c.smallAllocCount[i]) + memstats.by_size[i].nmalloc += uint64(c.smallAllocCount[i]) + totalAlloc += uint64(c.smallAllocCount[i]) * uint64(class_to_size[i]) // Free stats. - memstats.nfree += uint64(c.local_nsmallfree[i]) - memstats.by_size[i].nfree += uint64(c.local_nsmallfree[i]) - smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i]) + memstats.nfree += uint64(c.smallFreeCount[i]) + memstats.by_size[i].nfree += uint64(c.smallFreeCount[i]) + smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i]) } } diff --git a/src/runtime/pprof/mprof_test.go b/src/runtime/pprof/mprof_test.go index f253f07def..c11a45fd69 100644 --- a/src/runtime/pprof/mprof_test.go +++ b/src/runtime/pprof/mprof_test.go @@ -70,7 +70,7 @@ func TestMemoryProfiler(t *testing.T) { runtime.MemProfileRate = oldRate }() - // Allocate a meg to ensure that mcache.next_sample is updated to 1. + // Allocate a meg to ensure that mcache.nextSample is updated to 1. for i := 0; i < 1024; i++ { memSink = make([]byte, 1024) } -- cgit v1.3 From d39a89fd5843f535d634620d27110b320431f584 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 6 Aug 2020 21:59:13 +0000 Subject: runtime,runtime/metrics: add metric for distribution of GC pauses For #37112. Change-Id: Ibb0425c9c582ae3da3b2662d5bbe830d7df9079c Reviewed-on: https://go-review.googlesource.com/c/go/+/247047 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/metrics.go | 9 +++++++++ src/runtime/metrics/description.go | 5 +++++ src/runtime/metrics/doc.go | 3 +++ src/runtime/metrics_test.go | 22 ++++++++++++++++++++++ src/runtime/mgc.go | 3 +++ src/runtime/mstats.go | 12 ++++++++++++ 6 files changed, 54 insertions(+) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 2be38ccaaa..0e391472b2 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -102,6 +102,15 @@ func initMetrics() { out.scalar = in.heapStats.numObjects }, }, + "/gc/pauses:seconds": { + compute: func(_ *statAggregate, out *metricValue) { + hist := out.float64HistOrInit(timeHistBuckets) + hist.counts[len(hist.counts)-1] = atomic.Load64(&memstats.gcPauseDist.overflow) + for i := range hist.buckets { + hist.counts[i] = atomic.Load64(&memstats.gcPauseDist.counts[i]) + } + }, + }, "/memory/classes/heap/free:bytes": { deps: makeStatDepSet(heapStatsDep), compute: func(in *statAggregate, out *metricValue) { diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go index e43904fc7d..47959e467c 100644 --- a/src/runtime/metrics/description.go +++ b/src/runtime/metrics/description.go @@ -88,6 +88,11 @@ var allDesc = []Description{ Description: "Number of objects, live or unswept, occupying heap memory.", Kind: KindUint64, }, + { + Name: "/gc/pauses:seconds", + Description: "Distribution individual GC-related stop-the-world pause latencies.", + Kind: KindFloat64Histogram, + }, { Name: "/memory/classes/heap/free:bytes", Description: "Memory that is available for allocation, and may be returned to the underlying system.", diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go index 5045a5b4c1..1e12ade5a1 100644 --- a/src/runtime/metrics/doc.go +++ b/src/runtime/metrics/doc.go @@ -65,6 +65,9 @@ Supported metrics /gc/heap/objects:objects Number of objects, live or unswept, occupying heap memory. + /gc/pauses:seconds + Distribution individual GC-related stop-the-world pause latencies. + /memory/classes/heap/free:bytes Memory that is available for allocation, and may be returned to the underlying system. diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 1a30810544..7b3132bc30 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -90,6 +90,11 @@ func TestReadMetricsConsistency(t *testing.T) { // things (e.g. allocating) so what we read can't reasonably compared // to runtime values. + // Run a few GC cycles to get some of the stats to be non-zero. + runtime.GC() + runtime.GC() + runtime.GC() + // Read all the supported metrics through the metrics package. descs, samples := prepareAllMetricsSamples() metrics.Read(samples) @@ -102,6 +107,10 @@ func TestReadMetricsConsistency(t *testing.T) { alloc, free *metrics.Float64Histogram total uint64 } + var gc struct { + numGC uint64 + pauses uint64 + } for i := range samples { kind := samples[i].Value.Kind() if want := descs[samples[i].Name].Kind; kind != want { @@ -128,6 +137,14 @@ func TestReadMetricsConsistency(t *testing.T) { objects.alloc = samples[i].Value.Float64Histogram() case "/gc/heap/frees-by-size:objects": objects.free = samples[i].Value.Float64Histogram() + case "/gc/cycles:gc-cycles": + gc.numGC = samples[i].Value.Uint64() + case "/gc/pauses:seconds": + h := samples[i].Value.Float64Histogram() + gc.pauses = 0 + for i := range h.Counts { + gc.pauses += h.Counts[i] + } } } if totalVirtual.got != totalVirtual.want { @@ -159,6 +176,11 @@ func TestReadMetricsConsistency(t *testing.T) { } } } + // The current GC has at least 2 pauses per GC. + // Check to see if that value makes sense. + if gc.pauses < gc.numGC*2 { + t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2) + } } func BenchmarkReadMetricsLatency(b *testing.B) { diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 540c376f1c..b0ab0ae6bb 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1418,6 +1418,7 @@ func gcStart(trigger gcTrigger) { now = startTheWorldWithSema(trace.enabled) work.pauseNS += now - work.pauseStart work.tMark = now + memstats.gcPauseDist.record(now - work.pauseStart) }) // Release the world sema before Gosched() in STW mode @@ -1565,6 +1566,7 @@ top: systemstack(func() { now := startTheWorldWithSema(true) work.pauseNS += now - work.pauseStart + memstats.gcPauseDist.record(now - work.pauseStart) }) semrelease(&worldsema) goto top @@ -1677,6 +1679,7 @@ func gcMarkTermination(nextTriggerRatio float64) { unixNow := sec*1e9 + int64(nsec) work.pauseNS += now - work.pauseStart work.tEnd = now + memstats.gcPauseDist.record(now - work.pauseStart) atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS) diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index 07f466ec49..e0a417d213 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -157,6 +157,14 @@ type mstats struct { // heapStats is a set of statistics heapStats consistentHeapStats + + _ uint32 // ensure gcPauseDist is aligned + + // gcPauseDist represents the distribution of all GC-related + // application pauses in the runtime. + // + // Each individual pause is counted separately, unlike pause_ns. + gcPauseDist timeHistogram } var memstats mstats @@ -443,6 +451,10 @@ func init() { println(offset) throw("memstats.heapStats not aligned to 8 bytes") } + if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 { + println(offset) + throw("memstats.gcPauseDist not aligned to 8 bytes") + } // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g. // [3]heapStatsDelta) to be 8-byte aligned. if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 { -- cgit v1.3 From e1faebe7b40c23811a6025ed104d3ce9882f0c3b Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 13 Oct 2020 12:39:13 -0400 Subject: runtime: manage gcBgMarkWorkers with a global pool Background mark workers perform per-P marking work. Currently each worker is assigned a P at creation time. The worker "attaches" to the P via p.gcBgMarkWorker, making itself (usually) available to findRunnableGCWorker for scheduling GC work. While running gcMarkDone, the worker "detaches" from the P (by clearing p.gcBgMarkWorker), since it may park for other reasons and should not be scheduled by findRunnableGCWorker. Unfortunately, this design is complex and difficult to reason about. We simplify things by changing the design to eliminate the hard P attachment. Rather than workers always performing work from the same P, workers perform work for whichever P they find themselves on. On park, the workers are placed in a pool of free workers, which each P's findRunnableGCWorker can use to run a worker for its P. Now if a worker parks in gcMarkDone, a P may simply use another worker from the pool to complete its own work. The P's GC worker mode is used to communicate the mode to run to the selected worker. It is also used to emit the appropriate worker EvGoStart tracepoint. This is a slight change, as this G may be preempted (e.g., in gcMarkDone). When it is rescheduled, the trace viewer will show it as a normal goroutine again. It is currently a bit difficult to connect to the original worker tracepoint, as the viewer does not display the goid for the original worker (though the data is in the trace file). Change-Id: Id7bd3a364dc18a4d2b1c99c4dc4810fae1293c1b Reviewed-on: https://go-review.googlesource.com/c/go/+/262348 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Michael Knyszek Trust: Michael Pratt --- src/runtime/mgc.go | 222 ++++++++++++++++++++++++++---------------------- src/runtime/proc.go | 69 +++++++++------ src/runtime/runtime2.go | 19 ++++- src/runtime/trace.go | 2 +- 4 files changed, 182 insertions(+), 130 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index b0ab0ae6bb..fabb846a74 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -290,10 +290,14 @@ func setGCPhase(x uint32) { type gcMarkWorkerMode int const ( + // gcMarkWorkerNotWorker indicates that the next scheduled G is not + // starting work and the mode should be ignored. + gcMarkWorkerNotWorker gcMarkWorkerMode = iota + // gcMarkWorkerDedicatedMode indicates that the P of a mark // worker is dedicated to running that mark worker. The mark // worker should run without preemption. - gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota + gcMarkWorkerDedicatedMode // gcMarkWorkerFractionalMode indicates that a P is currently // running the "fractional" mark worker. The fractional worker @@ -313,6 +317,7 @@ const ( // gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes // to use in execution traces. var gcMarkWorkerModeStrings = [...]string{ + "Not worker", "GC (dedicated)", "GC (fractional)", "GC (idle)", @@ -708,18 +713,12 @@ func (c *gcControllerState) enlistWorker() { } } -// findRunnableGCWorker returns the background mark worker for _p_ if it +// findRunnableGCWorker returns a background mark worker for _p_ if it // should be run. This must only be called when gcBlackenEnabled != 0. func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { if gcBlackenEnabled == 0 { throw("gcControllerState.findRunnable: blackening not enabled") } - if _p_.gcBgMarkWorker == 0 { - // The mark worker associated with this P is blocked - // performing a mark transition. We can't run it - // because it may be on some other run or wait queue. - return nil - } if !gcMarkWorkAvailable(_p_) { // No work to be done right now. This can happen at @@ -729,15 +728,35 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { return nil } + // Grab a worker before we commit to running below. + node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) + if node == nil { + // There is at least one worker per P, so normally there are + // enough workers to run on all Ps, if necessary. However, once + // a worker enters gcMarkDone it may park without rejoining the + // pool, thus freeing a P with no corresponding worker. + // gcMarkDone never depends on another worker doing work, so it + // is safe to simply do nothing here. + // + // If gcMarkDone bails out without completing the mark phase, + // it will always do so with queued global work. Thus, that P + // will be immediately eligible to re-run the worker G it was + // just using, ensuring work can complete. + return nil + } + decIfPositive := func(ptr *int64) bool { - if *ptr > 0 { - if atomic.Xaddint64(ptr, -1) >= 0 { + for { + v := atomic.Loadint64(ptr) + if v <= 0 { + return false + } + + // TODO: having atomic.Casint64 would be more pleasant. + if atomic.Cas64((*uint64)(unsafe.Pointer(ptr)), uint64(v), uint64(v-1)) { return true } - // We lost a race - atomic.Xaddint64(ptr, +1) } - return false } if decIfPositive(&c.dedicatedMarkWorkersNeeded) { @@ -746,6 +765,7 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { _p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode } else if c.fractionalUtilizationGoal == 0 { // No need for fractional workers. + gcBgMarkWorkerPool.push(&node.node) return nil } else { // Is this P behind on the fractional utilization @@ -755,14 +775,15 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g { delta := nanotime() - gcController.markStartTime if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal { // Nope. No need to run a fractional worker. + gcBgMarkWorkerPool.push(&node.node) return nil } // Run a fractional worker. _p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode } - // Run the background mark worker - gp := _p_.gcBgMarkWorker.ptr() + // Run the background mark worker. + gp := node.gp.ptr() casgstatus(gp, _Gwaiting, _Grunnable) if trace.enabled { traceGoUnpark(gp, 0) @@ -1796,19 +1817,21 @@ func gcMarkTermination(nextTriggerRatio float64) { } } -// gcBgMarkStartWorkers prepares background mark worker goroutines. -// These goroutines will not run until the mark phase, but they must -// be started while the work is not stopped and from a regular G -// stack. The caller must hold worldsema. +// gcBgMarkStartWorkers prepares background mark worker goroutines. These +// goroutines will not run until the mark phase, but they must be started while +// the work is not stopped and from a regular G stack. The caller must hold +// worldsema. func gcBgMarkStartWorkers() { - // Background marking is performed by per-P G's. Ensure that - // each P has a background GC G. - for _, p := range allp { - if p.gcBgMarkWorker == 0 { - go gcBgMarkWorker(p) - notetsleepg(&work.bgMarkReady, -1) - noteclear(&work.bgMarkReady) - } + // Background marking is performed by per-P G's. Ensure that each P has + // a background GC G. + // + // Worker Gs don't exit if gomaxprocs is reduced. If it is raised + // again, we can reuse the old workers; no need to create new workers. + for gcBgMarkWorkerCount < gomaxprocs { + go gcBgMarkWorker() + notetsleepg(&work.bgMarkReady, -1) + noteclear(&work.bgMarkReady) + gcBgMarkWorkerCount++ } } @@ -1828,82 +1851,81 @@ func gcBgMarkPrepare() { work.nwait = ^uint32(0) } -func gcBgMarkWorker(_p_ *p) { +// gcBgMarkWorker is an entry in the gcBgMarkWorkerPool. It points to a single +// gcBgMarkWorker goroutine. +type gcBgMarkWorkerNode struct { + // Unused workers are managed in a lock-free stack. This field must be first. + node lfnode + + // The g of this worker. + gp guintptr + + // Release this m on park. This is used to communicate with the unlock + // function, which cannot access the G's stack. It is unused outside of + // gcBgMarkWorker(). + m muintptr +} + +func gcBgMarkWorker() { gp := getg() - type parkInfo struct { - m muintptr // Release this m on park. - attach puintptr // If non-nil, attach to this p on park. - } - // We pass park to a gopark unlock function, so it can't be on + // We pass node to a gopark unlock function, so it can't be on // the stack (see gopark). Prevent deadlock from recursively // starting GC by disabling preemption. gp.m.preemptoff = "GC worker init" - park := new(parkInfo) + node := new(gcBgMarkWorkerNode) gp.m.preemptoff = "" - park.m.set(acquirem()) - park.attach.set(_p_) - // Inform gcBgMarkStartWorkers that this worker is ready. - // After this point, the background mark worker is scheduled - // cooperatively by gcController.findRunnable. Hence, it must - // never be preempted, as this would put it into _Grunnable - // and put it on a run queue. Instead, when the preempt flag - // is set, this puts itself into _Gwaiting to be woken up by - // gcController.findRunnable at the appropriate time. + node.gp.set(gp) + node.m.set(acquirem()) + // Inform gcBgMarkStartWorkers that this worker is ready. After this + // point, the background mark worker is scheduled cooperatively by + // gcController.findRunnableGCWorker. Hence, it must never be + // preempted, as this would put it into _Grunnable and put it on a run + // queue. Instead, when the preempt flag is set, this puts itself into + // _Gwaiting to be woken up by gcController.findRunnableGCWorker at the + // appropriate time. notewakeup(&work.bgMarkReady) for { - // Go to sleep until woken by gcController.findRunnable. - // We can't releasem yet since even the call to gopark - // may be preempted. - gopark(func(g *g, parkp unsafe.Pointer) bool { - park := (*parkInfo)(parkp) + // Go to sleep until woken by + // gcController.findRunnableGCWorker. We can't releasem yet + // since even the call to gopark may be preempted. + gopark(func(g *g, nodep unsafe.Pointer) bool { + node := (*gcBgMarkWorkerNode)(nodep) // The worker G is no longer running, so it's // now safe to allow preemption. - releasem(park.m.ptr()) - - // If the worker isn't attached to its P, - // attach now. During initialization and after - // a phase change, the worker may have been - // running on a different P. As soon as we - // attach, the owner P may schedule the - // worker, so this must be done after the G is - // stopped. - if park.attach != 0 { - p := park.attach.ptr() - park.attach.set(nil) - // cas the worker because we may be - // racing with a new worker starting - // on this P. - if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) { - // The P got a new worker. - // Exit this worker. - return false - } - } + releasem(node.m.ptr()) + + // Release this G to the pool. + gcBgMarkWorkerPool.push(&node.node) + // Note that at this point, the G may immediately be + // rescheduled and may be running. + return true - }, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0) + }, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0) - // Loop until the P dies and disassociates this - // worker (the P may later be reused, in which case - // it will get a new worker) or we failed to associate. - if _p_.gcBgMarkWorker.ptr() != gp { - break - } + // Preemption must not occur here, or another G might see + // p.gcMarkWorkerMode. // Disable preemption so we can use the gcw. If the // scheduler wants to preempt us, we'll stop draining, // dispose the gcw, and then preempt. - park.m.set(acquirem()) + node.m.set(acquirem()) + pp := gp.m.p.ptr() // P can't change with preemption disabled. if gcBlackenEnabled == 0 { + println("worker mode", pp.gcMarkWorkerMode) throw("gcBgMarkWorker: blackening not enabled") } + if pp.gcMarkWorkerMode == gcMarkWorkerNotWorker { + throw("gcBgMarkWorker: mode not set") + } + startTime := nanotime() - _p_.gcMarkWorkerStartTime = startTime + pp.gcMarkWorkerStartTime = startTime decnwait := atomic.Xadd(&work.nwait, -1) if decnwait == work.nproc { @@ -1920,11 +1942,11 @@ func gcBgMarkWorker(_p_ *p) { // disabled for mark workers, so it is safe to // read from the G stack. casgstatus(gp, _Grunning, _Gwaiting) - switch _p_.gcMarkWorkerMode { + switch pp.gcMarkWorkerMode { default: throw("gcBgMarkWorker: unexpected gcMarkWorkerMode") case gcMarkWorkerDedicatedMode: - gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit) if gp.preempt { // We were preempted. This is // a useful signal to kick @@ -1933,7 +1955,7 @@ func gcBgMarkWorker(_p_ *p) { // somewhere else. lock(&sched.lock) for { - gp, _ := runqget(_p_) + gp, _ := runqget(pp) if gp == nil { break } @@ -1943,24 +1965,24 @@ func gcBgMarkWorker(_p_ *p) { } // Go back to draining, this time // without preemption. - gcDrain(&_p_.gcw, gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainFlushBgCredit) case gcMarkWorkerFractionalMode: - gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit) case gcMarkWorkerIdleMode: - gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) + gcDrain(&pp.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit) } casgstatus(gp, _Gwaiting, _Grunning) }) // Account for time. duration := nanotime() - startTime - switch _p_.gcMarkWorkerMode { + switch pp.gcMarkWorkerMode { case gcMarkWorkerDedicatedMode: atomic.Xaddint64(&gcController.dedicatedMarkTime, duration) atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1) case gcMarkWorkerFractionalMode: atomic.Xaddint64(&gcController.fractionalMarkTime, duration) - atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration) + atomic.Xaddint64(&pp.gcFractionalMarkTime, duration) case gcMarkWorkerIdleMode: atomic.Xaddint64(&gcController.idleMarkTime, duration) } @@ -1969,31 +1991,31 @@ func gcBgMarkWorker(_p_ *p) { // of work? incnwait := atomic.Xadd(&work.nwait, +1) if incnwait > work.nproc { - println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode, + println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode, "work.nwait=", incnwait, "work.nproc=", work.nproc) throw("work.nwait > work.nproc") } + // We'll releasem after this point and thus this P may run + // something else. We must clear the worker mode to avoid + // attributing the mode to a different (non-worker) G in + // traceGoStart. + pp.gcMarkWorkerMode = gcMarkWorkerNotWorker + // If this worker reached a background mark completion // point, signal the main GC goroutine. if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { - // Make this G preemptible and disassociate it - // as the worker for this P so - // findRunnableGCWorker doesn't try to - // schedule it. - _p_.gcBgMarkWorker.set(nil) - releasem(park.m.ptr()) + // Make this G preemptible since we are done with per-P + // work. + releasem(node.m.ptr()) gcMarkDone() - // Disable preemption and prepare to reattach - // to the P. + // Disable preemption and prepare to park. // - // We may be running on a different P at this - // point, so we can't reattach until this G is - // parked. - park.m.set(acquirem()) - park.attach.set(_p_) + // Note that we may be running on a different P at this + // point, so we can't use pp. + node.m.set(acquirem()) } } } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 64c891d007..c97f4820da 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2554,14 +2554,17 @@ stop: // We have nothing to do. If we're in the GC mark phase, can // safely scan and blacken objects, and have work to do, run // idle-time marking rather than give up the P. - if gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) { - _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode - gp := _p_.gcBgMarkWorker.ptr() - casgstatus(gp, _Gwaiting, _Grunnable) - if trace.enabled { - traceGoUnpark(gp, 0) + if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) { + node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) + if node != nil { + _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode + gp := node.gp.ptr() + casgstatus(gp, _Gwaiting, _Grunnable) + if trace.enabled { + traceGoUnpark(gp, 0) + } + return gp, false } - return gp, false } delta := int64(-1) @@ -2681,12 +2684,33 @@ stop: } // Check for idle-priority GC work again. - if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) { + // + // N.B. Since we have no P, gcBlackenEnabled may change at any time; we + // must check again after acquiring a P. + if atomic.Load(&gcBlackenEnabled) != 0 && gcMarkWorkAvailable(nil) { + // Work is available; we can start an idle GC worker only if + // there is an available P and available worker G. + // + // We can attempt to acquire these in either order. Workers are + // almost always available (see comment in findRunnableGCWorker + // for the one case there may be none). Since we're slightly + // less likely to find a P, check for that first. lock(&sched.lock) + var node *gcBgMarkWorkerNode _p_ = pidleget() - if _p_ != nil && _p_.gcBgMarkWorker == 0 { - pidleput(_p_) - _p_ = nil + if _p_ != nil { + // Now that we own a P, gcBlackenEnabled can't change + // (as it requires STW). + if gcBlackenEnabled != 0 { + node = (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) + if node == nil { + pidleput(_p_) + _p_ = nil + } + } else { + pidleput(_p_) + _p_ = nil + } } unlock(&sched.lock) if _p_ != nil { @@ -2695,8 +2719,15 @@ stop: _g_.m.spinning = true atomic.Xadd(&sched.nmspinning, 1) } - // Go back to idle GC check. - goto stop + + // Run the idle worker. + _p_.gcMarkWorkerMode = gcMarkWorkerIdleMode + gp := node.gp.ptr() + casgstatus(gp, _Gwaiting, _Grunnable) + if trace.enabled { + traceGoUnpark(gp, 0) + } + return gp, false } } @@ -4547,18 +4578,6 @@ func (pp *p) destroy() { unlock(&pp.timersLock) unlock(&plocal.timersLock) } - // If there's a background worker, make it runnable and put - // it on the global queue so it can clean itself up. - if gp := pp.gcBgMarkWorker.ptr(); gp != nil { - casgstatus(gp, _Gwaiting, _Grunnable) - if trace.enabled { - traceGoUnpark(gp, 0) - } - globrunqput(gp) - // This assignment doesn't race because the - // world is stopped. - pp.gcBgMarkWorker.set(nil) - } // Flush p's write barrier buffer. if gcphase != _GCoff { wbBufFlush1(pp) diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index 2dbc0efca3..82fedd804b 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -656,11 +656,15 @@ type p struct { // Per-P GC state gcAssistTime int64 // Nanoseconds in assistAlloc gcFractionalMarkTime int64 // Nanoseconds in fractional mark worker (atomic) - gcBgMarkWorker guintptr // (atomic) - gcMarkWorkerMode gcMarkWorkerMode - // gcMarkWorkerStartTime is the nanotime() at which this mark - // worker started. + // gcMarkWorkerMode is the mode for the next mark worker to run in. + // That is, this is used to communicate with the worker goroutine + // selected for immediate execution by + // gcController.findRunnableGCWorker. When scheduling other goroutines, + // this field must be set to gcMarkWorkerNotWorker. + gcMarkWorkerMode gcMarkWorkerMode + // gcMarkWorkerStartTime is the nanotime() at which the most recent + // mark worker started. gcMarkWorkerStartTime int64 // gcw is this P's GC work buffer cache. The work buffer is @@ -1073,6 +1077,13 @@ var ( // must be atomic. Length may change at safe points. timerpMask pMask + // Pool of GC parked background workers. Entries are type + // *gcBgMarkWorkerNode. + gcBgMarkWorkerPool lfstack + + // Total number of gcBgMarkWorker goroutines. Protected by worldsema. + gcBgMarkWorkerCount int32 + // Information about what cpu features are available. // Packages outside the runtime should not use these // as they are not an external api. diff --git a/src/runtime/trace.go b/src/runtime/trace.go index d3ecd148be..bcd0b9d56c 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -1064,7 +1064,7 @@ func traceGoStart() { _g_ := getg().m.curg _p_ := _g_.m.p _g_.traceseq++ - if _g_ == _p_.ptr().gcBgMarkWorker.ptr() { + if _p_.ptr().gcMarkWorkerMode != gcMarkWorkerNotWorker { traceEvent(traceEvGoStartLabel, -1, uint64(_g_.goid), _g_.traceseq, trace.markWorkerLabels[_p_.ptr().gcMarkWorkerMode]) } else if _g_.tracelastp == _p_ { traceEvent(traceEvGoStartLocal, -1, uint64(_g_.goid)) -- cgit v1.3 From 256d729c0b272021a44f61f47cd2c9c6d9fb1722 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 13 Oct 2020 18:11:26 -0400 Subject: runtime: simplify gcBgMarkWorker preemption gcBgMarkWorker G's are primarily scheduled by findRunnableGCWorker, but that no longer needs to be strictly enforced. Temporary preemption to a runq is fine when the P is not in use. We still releasem in gopark in the normal case for efficiency: if gcDrain stops because gp.preempt is set, then gopark would always preempt. That is fine, but inefficient, since it will reschedule simply to park again. Thus, we keep releasem in unlockf to skip this extra cycle. Change-Id: I6d1a42e3ca41b76227142a6b5bfb376c9213e3c9 Reviewed-on: https://go-review.googlesource.com/c/go/+/262349 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Michael Knyszek Trust: Michael Pratt --- src/runtime/mgc.go | 67 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 22 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index fabb846a74..9d2682f03c 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -1829,8 +1829,12 @@ func gcBgMarkStartWorkers() { // again, we can reuse the old workers; no need to create new workers. for gcBgMarkWorkerCount < gomaxprocs { go gcBgMarkWorker() + notetsleepg(&work.bgMarkReady, -1) noteclear(&work.bgMarkReady) + // The worker is now guaranteed to be added to the pool before + // its P's next findRunnableGCWorker. + gcBgMarkWorkerCount++ } } @@ -1877,32 +1881,55 @@ func gcBgMarkWorker() { gp.m.preemptoff = "" node.gp.set(gp) + node.m.set(acquirem()) - // Inform gcBgMarkStartWorkers that this worker is ready. After this - // point, the background mark worker is scheduled cooperatively by - // gcController.findRunnableGCWorker. Hence, it must never be - // preempted, as this would put it into _Grunnable and put it on a run - // queue. Instead, when the preempt flag is set, this puts itself into - // _Gwaiting to be woken up by gcController.findRunnableGCWorker at the - // appropriate time. notewakeup(&work.bgMarkReady) + // After this point, the background mark worker is generally scheduled + // cooperatively by gcController.findRunnableGCWorker. While performing + // work on the P, preemption is disabled because we are working on + // P-local work buffers. When the preempt flag is set, this puts itself + // into _Gwaiting to be woken up by gcController.findRunnableGCWorker + // at the appropriate time. + // + // When preemption is enabled (e.g., while in gcMarkDone), this worker + // may be preempted and schedule as a _Grunnable G from a runq. That is + // fine; it will eventually gopark again for further scheduling via + // findRunnableGCWorker. + // + // Since we disable preemption before notifying bgMarkReady, we + // guarantee that this G will be in the worker pool for the next + // findRunnableGCWorker. This isn't strictly necessary, but it reduces + // latency between _GCmark starting and the workers starting. for { // Go to sleep until woken by - // gcController.findRunnableGCWorker. We can't releasem yet - // since even the call to gopark may be preempted. + // gcController.findRunnableGCWorker. gopark(func(g *g, nodep unsafe.Pointer) bool { node := (*gcBgMarkWorkerNode)(nodep) - // The worker G is no longer running, so it's - // now safe to allow preemption. - releasem(node.m.ptr()) + if mp := node.m.ptr(); mp != nil { + // The worker G is no longer running; release + // the M. + // + // N.B. it is _safe_ to release the M as soon + // as we are no longer performing P-local mark + // work. + // + // However, since we cooperatively stop work + // when gp.preempt is set, if we releasem in + // the loop then the following call to gopark + // would immediately preempt the G. This is + // also safe, but inefficient: the G must + // schedule again only to enter gopark and park + // again. Thus, we defer the release until + // after parking the G. + releasem(mp) + } // Release this G to the pool. gcBgMarkWorkerPool.push(&node.node) // Note that at this point, the G may immediately be // rescheduled and may be running. - return true }, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0) @@ -1913,7 +1940,7 @@ func gcBgMarkWorker() { // scheduler wants to preempt us, we'll stop draining, // dispose the gcw, and then preempt. node.m.set(acquirem()) - pp := gp.m.p.ptr() // P can't change with preemption disabled. + pp := gp.m.p.ptr() // P can't change with preemption disabled. if gcBlackenEnabled == 0 { println("worker mode", pp.gcMarkWorkerMode) @@ -2005,17 +2032,13 @@ func gcBgMarkWorker() { // If this worker reached a background mark completion // point, signal the main GC goroutine. if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { - // Make this G preemptible since we are done with per-P - // work. + // We don't need the P-local buffers here, allow + // preemption becuse we may schedule like a regular + // goroutine in gcMarkDone (block on locks, etc). releasem(node.m.ptr()) + node.m.set(nil) gcMarkDone() - - // Disable preemption and prepare to park. - // - // Note that we may be running on a different P at this - // point, so we can't use pp. - node.m.set(acquirem()) } } } -- cgit v1.3 From 6abbfc17c255c07134a69c3ca305231db80530ec Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 28 Oct 2020 18:06:05 -0400 Subject: runtime: add world-stopped assertions Stopping the world is an implicit lock for many operations, so we should assert the world is stopped in functions that require it. This is enabled along with the rest of lock ranking, though it is a bit orthogonal and likely cheap enough to enable all the time should we choose. Requiring a lock _or_ world stop is common, so that can be expressed as well. Updates #40677 Change-Id: If0a58544f4251d367f73c4120c9d39974c6cd091 Reviewed-on: https://go-review.googlesource.com/c/go/+/248577 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Austin Clements Trust: Michael Pratt --- src/runtime/heapdump.go | 15 +++++++++ src/runtime/lockrank_off.go | 16 +++++++++ src/runtime/lockrank_on.go | 79 +++++++++++++++++++++++++++++++++++++++++++++ src/runtime/mcheckmark.go | 2 ++ src/runtime/mgc.go | 2 ++ src/runtime/mgcmark.go | 4 +++ src/runtime/mgcsweep.go | 2 ++ src/runtime/mstats.go | 14 ++++++-- src/runtime/proc.go | 14 ++++++++ 9 files changed, 145 insertions(+), 3 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index 33e224d587..2d531571aa 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -431,6 +431,9 @@ func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, o } func dumproots() { + // To protect mheap_.allspans. + assertWorldStopped() + // TODO(mwhudson): dump datamask etc from all objects // data segment dumpint(tagData) @@ -468,6 +471,9 @@ func dumproots() { var freemark [_PageSize / 8]bool func dumpobjs() { + // To protect mheap_.allspans. + assertWorldStopped() + for _, s := range mheap_.allspans { if s.state.get() != mSpanInUse { continue @@ -552,6 +558,8 @@ func dumpms() { //go:systemstack func dumpmemstats(m *MemStats) { + assertWorldStopped() + // These ints should be identical to the exported // MemStats structure and should be ordered the same // way too. @@ -634,6 +642,9 @@ func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, } func dumpmemprof() { + // To protect mheap_.allspans. + assertWorldStopped() + iterate_memprof(dumpmemprof_callback) for _, s := range mheap_.allspans { if s.state.get() != mSpanInUse { @@ -655,6 +666,8 @@ func dumpmemprof() { var dumphdr = []byte("go1.7 heap dump\n") func mdump(m *MemStats) { + assertWorldStopped() + // make sure we're done sweeping for _, s := range mheap_.allspans { if s.state.get() == mSpanInUse { @@ -676,6 +689,8 @@ func mdump(m *MemStats) { } func writeheapdump_m(fd uintptr, m *MemStats) { + assertWorldStopped() + _g_ := getg() casgstatus(_g_.m.curg, _Grunning, _Gwaiting) _g_.waitreason = waitReasonDumpingHeap diff --git a/src/runtime/lockrank_off.go b/src/runtime/lockrank_off.go index 40edf882ee..7dcd8f5fe9 100644 --- a/src/runtime/lockrank_off.go +++ b/src/runtime/lockrank_off.go @@ -46,3 +46,19 @@ func assertLockHeld(l *mutex) { //go:nosplit func assertRankHeld(r lockRank) { } + +//go:nosplit +func worldStopped() { +} + +//go:nosplit +func worldStarted() { +} + +//go:nosplit +func assertWorldStopped() { +} + +//go:nosplit +func assertWorldStoppedOrLockHeld(l *mutex) { +} diff --git a/src/runtime/lockrank_on.go b/src/runtime/lockrank_on.go index db7ff23a58..c25b3a4656 100644 --- a/src/runtime/lockrank_on.go +++ b/src/runtime/lockrank_on.go @@ -7,9 +7,14 @@ package runtime import ( + "runtime/internal/atomic" "unsafe" ) +// worldIsStopped is accessed atomically to track world-stops. 1 == world +// stopped. +var worldIsStopped uint32 + // lockRankStruct is embedded in mutex type lockRankStruct struct { // static lock ranking of the lock @@ -284,3 +289,77 @@ func assertRankHeld(r lockRank) { throw("not holding required lock!") }) } + +// worldStopped notes that the world is stopped. +// +// Caller must hold worldsema. +// +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func worldStopped() { + if stopped := atomic.Xadd(&worldIsStopped, 1); stopped != 1 { + print("world stop count=", stopped, "\n") + throw("recursive world stop") + } +} + +// worldStarted that the world is starting. +// +// Caller must hold worldsema. +// +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func worldStarted() { + if stopped := atomic.Xadd(&worldIsStopped, -1); stopped != 0 { + print("world stop count=", stopped, "\n") + throw("released non-stopped world stop") + } +} + +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func checkWorldStopped() bool { + stopped := atomic.Load(&worldIsStopped) + if stopped > 1 { + print("inconsistent world stop count=", stopped, "\n") + throw("inconsistent world stop count") + } + + return stopped == 1 +} + +// assertWorldStopped throws if the world is not stopped. It does not check +// which M stopped the world. +// +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func assertWorldStopped() { + if checkWorldStopped() { + return + } + + throw("world not stopped") +} + +// assertWorldStoppedOrLockHeld throws if the world is not stopped and the +// passed lock is not held. +// +// nosplit to ensure it can be called in as many contexts as possible. +//go:nosplit +func assertWorldStoppedOrLockHeld(l *mutex) { + if checkWorldStopped() { + return + } + + gp := getg() + systemstack(func() { + held := checkLockHeld(gp, l) + if !held { + printlock() + print("caller requires world stop or lock ", l, " (rank ", l.rank.String(), "), holding:\n") + println("") + printHeldLocks(gp) + throw("no world stop or required lock!") + } + }) +} diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go index c0b028d715..ba80ac1bdf 100644 --- a/src/runtime/mcheckmark.go +++ b/src/runtime/mcheckmark.go @@ -34,6 +34,8 @@ var useCheckmark = false // // The world must be stopped. func startCheckmarks() { + assertWorldStopped() + // Clear all checkmarks. for _, ai := range mheap_.allArenas { arena := mheap_.arenas[ai.l1()][ai.l2()] diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 9d2682f03c..fb3c149942 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -2164,6 +2164,8 @@ func gcMark(start_time int64) { // //go:systemstack func gcSweep(mode gcMode) { + assertWorldStopped() + if gcphase != _GCoff { throw("gcSweep being done but phase is not GCoff") } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index c71c0e58d3..5a24cdac88 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -54,6 +54,8 @@ const ( // // The world must be stopped. func gcMarkRootPrepare() { + assertWorldStopped() + work.nFlushCacheRoots = 0 // Compute how many data and BSS root blocks there are. @@ -1535,6 +1537,8 @@ func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) { // // The world must be stopped. func gcMarkTinyAllocs() { + assertWorldStopped() + for _, p := range allp { c := p.mcache if c == nil || c.tiny == 0 { diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 9b77ce635c..8391435630 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -123,6 +123,8 @@ func (h *mheap) nextSpanForSweep() *mspan { // //go:nowritebarrier func finishsweep_m() { + assertWorldStopped() + // Sweeping must be complete before marking commences, so // sweep any unswept spans. If this is a concurrent GC, there // shouldn't be any spans left to sweep, so this should finish diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index e0a417d213..3829355d7b 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -601,6 +601,8 @@ func readGCStats_m(pauses *[]uint64) { // //go:nowritebarrier func updatememstats() { + assertWorldStopped() + // Flush mcaches to mcentral before doing anything else. // // Flushing to the mcentral may in general cause stats to @@ -706,6 +708,8 @@ func updatememstats() { // //go:nowritebarrier func flushmcache(i int) { + assertWorldStopped() + p := allp[i] c := p.mcache if c == nil { @@ -721,6 +725,8 @@ func flushmcache(i int) { // //go:nowritebarrier func flushallmcaches() { + assertWorldStopped() + for i := 0; i < int(gomaxprocs); i++ { flushmcache(i) } @@ -876,10 +882,10 @@ func (m *consistentHeapStats) release(c *mcache) { // unsafeRead aggregates the delta for this shard into out. // // Unsafe because it does so without any synchronization. The -// only safe time to call this is if the world is stopped or -// we're freezing the world or going down anyway (and we just -// want _some_ estimate). +// world must be stopped. func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { + assertWorldStopped() + for i := range m.stats { out.merge(&m.stats[i]) } @@ -890,6 +896,8 @@ func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) { // Unsafe because the world must be stopped and values should // be donated elsewhere before clearing. func (m *consistentHeapStats) unsafeClear() { + assertWorldStopped() + for i := range m.stats { m.stats[i] = heapStatsDelta{} } diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 939757f3a7..82284e6cd6 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -587,6 +587,9 @@ func schedinit() { sched.maxmcount = 10000 + // The world starts stopped. + worldStopped() + moduledataverify() stackinit() mallocinit() @@ -617,6 +620,9 @@ func schedinit() { } unlock(&sched.lock) + // World is effectively started now, as P's can run. + worldStarted() + // For cgocheck > 1, we turn on the write barrier at all times // and check all pointer writes. We can't do this until after // procresize because the write barrier needs a P. @@ -1082,9 +1088,13 @@ func stopTheWorldWithSema() { if bad != "" { throw(bad) } + + worldStopped() } func startTheWorldWithSema(emitTraceEvent bool) int64 { + assertWorldStopped() + mp := acquirem() // disable preemption because it can be holding p in a local var if netpollinited() { list := netpoll(0) // non-blocking @@ -1105,6 +1115,8 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 { } unlock(&sched.lock) + worldStarted() + for p1 != nil { p := p1 p1 = p1.link.ptr() @@ -4539,6 +4551,7 @@ func (pp *p) init(id int32) { // sched.lock must be held and the world must be stopped. func (pp *p) destroy() { assertLockHeld(&sched.lock) + assertWorldStopped() // Move all runnable goroutines to the global queue for pp.runqhead != pp.runqtail { @@ -4629,6 +4642,7 @@ func (pp *p) destroy() { // Returns list of Ps with local work, they need to be scheduled by the caller. func procresize(nprocs int32) *p { assertLockHeld(&sched.lock) + assertWorldStopped() old := gomaxprocs if old < 0 || nprocs <= 0 { -- cgit v1.3 From 9393b5bae5944acebed3ab6f995926b7de3ce429 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Fri, 21 Aug 2020 11:59:55 -0400 Subject: runtime: add heap lock assertions Some functions that required holding the heap lock _or_ world stop have been simplified to simply requiring the heap lock. This is conceptually simpler and taking the heap lock during world stop is guaranteed to not contend. This was only done on functions already called on the systemstack to avoid too many extra systemstack calls in GC. Updates #40677 Change-Id: I15aa1dadcdd1a81aac3d2a9ecad6e7d0377befdc Reviewed-on: https://go-review.googlesource.com/c/go/+/250262 Run-TryBot: Michael Pratt TryBot-Result: Go Bot Reviewed-by: Austin Clements Trust: Michael Pratt --- src/runtime/export_test.go | 61 +++++++++++++++++++++++++++++++++++++++++----- src/runtime/malloc.go | 2 ++ src/runtime/mgc.go | 4 +++ src/runtime/mgcscavenge.go | 18 ++++++++++++++ src/runtime/mheap.go | 29 +++++++++++++++++----- src/runtime/mpagealloc.go | 22 +++++++++++++++++ src/runtime/mpagecache.go | 14 ++++++++++- src/runtime/proc.go | 2 ++ 8 files changed, 139 insertions(+), 13 deletions(-) (limited to 'src/runtime/mgc.go') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 4ca0420d2a..44551dcaf1 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -743,7 +743,16 @@ func (c *PageCache) Alloc(npages uintptr) (uintptr, uintptr) { return (*pageCache)(c).alloc(npages) } func (c *PageCache) Flush(s *PageAlloc) { - (*pageCache)(c).flush((*pageAlloc)(s)) + cp := (*pageCache)(c) + sp := (*pageAlloc)(s) + + systemstack(func() { + // None of the tests need any higher-level locking, so we just + // take the lock internally. + lock(sp.mheapLock) + cp.flush(sp) + unlock(sp.mheapLock) + }) } // Expose chunk index type. @@ -754,13 +763,41 @@ type ChunkIdx chunkIdx type PageAlloc pageAlloc func (p *PageAlloc) Alloc(npages uintptr) (uintptr, uintptr) { - return (*pageAlloc)(p).alloc(npages) + pp := (*pageAlloc)(p) + + var addr, scav uintptr + systemstack(func() { + // None of the tests need any higher-level locking, so we just + // take the lock internally. + lock(pp.mheapLock) + addr, scav = pp.alloc(npages) + unlock(pp.mheapLock) + }) + return addr, scav } func (p *PageAlloc) AllocToCache() PageCache { - return PageCache((*pageAlloc)(p).allocToCache()) + pp := (*pageAlloc)(p) + + var c PageCache + systemstack(func() { + // None of the tests need any higher-level locking, so we just + // take the lock internally. + lock(pp.mheapLock) + c = PageCache(pp.allocToCache()) + unlock(pp.mheapLock) + }) + return c } func (p *PageAlloc) Free(base, npages uintptr) { - (*pageAlloc)(p).free(base, npages) + pp := (*pageAlloc)(p) + + systemstack(func() { + // None of the tests need any higher-level locking, so we just + // take the lock internally. + lock(pp.mheapLock) + pp.free(base, npages) + unlock(pp.mheapLock) + }) } func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) { return ChunkIdx((*pageAlloc)(p).start), ChunkIdx((*pageAlloc)(p).end) @@ -768,6 +805,8 @@ func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) { func (p *PageAlloc) Scavenge(nbytes uintptr, mayUnlock bool) (r uintptr) { pp := (*pageAlloc)(p) systemstack(func() { + // None of the tests need any higher-level locking, so we just + // take the lock internally. lock(pp.mheapLock) r = pp.scavenge(nbytes, mayUnlock) unlock(pp.mheapLock) @@ -926,7 +965,11 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc { addr := chunkBase(chunkIdx(i)) // Mark the chunk's existence in the pageAlloc. - p.grow(addr, pallocChunkBytes) + systemstack(func() { + lock(p.mheapLock) + p.grow(addr, pallocChunkBytes) + unlock(p.mheapLock) + }) // Initialize the bitmap and update pageAlloc metadata. chunk := p.chunkOf(chunkIndex(addr)) @@ -957,13 +1000,19 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc { } // Update heap metadata for the allocRange calls above. - p.update(addr, pallocChunkPages, false, false) + systemstack(func() { + lock(p.mheapLock) + p.update(addr, pallocChunkPages, false, false) + unlock(p.mheapLock) + }) } + systemstack(func() { lock(p.mheapLock) p.scavengeStartGen() unlock(p.mheapLock) }) + return (*PageAlloc)(p) } diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 0563f49d17..4b798d129c 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -627,6 +627,8 @@ func mallocinit() { // // h must be locked. func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { + assertLockHeld(&h.lock) + n = alignUp(n, heapArenaBytes) // First, try the arena pre-reservation. diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index fb3c149942..185d3201ca 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -821,6 +821,8 @@ func pollFractionalWorkerExit() bool { // // mheap_.lock must be held or the world must be stopped. func gcSetTriggerRatio(triggerRatio float64) { + assertWorldStoppedOrLockHeld(&mheap_.lock) + // Compute the next GC goal, which is when the allocated heap // has grown by GOGC/100 over the heap marked by the last // cycle. @@ -960,6 +962,8 @@ func gcSetTriggerRatio(triggerRatio float64) { // // mheap_.lock must be held or the world must be stopped. func gcEffectiveGrowthRatio() float64 { + assertWorldStoppedOrLockHeld(&mheap_.lock) + egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked) if egogc < 0 { // Shouldn't happen, but just in case. diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go index 5843ada981..a242577bd9 100644 --- a/src/runtime/mgcscavenge.go +++ b/src/runtime/mgcscavenge.go @@ -397,6 +397,8 @@ func bgscavenge(c chan int) { // //go:systemstack func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr { + assertLockHeld(p.mheapLock) + var ( addrs addrRange gen uint32 @@ -446,6 +448,8 @@ func printScavTrace(gen uint32, released uintptr, forced bool) { // //go:systemstack func (p *pageAlloc) scavengeStartGen() { + assertLockHeld(p.mheapLock) + if debug.scavtrace > 0 { printScavTrace(p.scav.gen, p.scav.released, false) } @@ -495,6 +499,8 @@ func (p *pageAlloc) scavengeStartGen() { // //go:systemstack func (p *pageAlloc) scavengeReserve() (addrRange, uint32) { + assertLockHeld(p.mheapLock) + // Start by reserving the minimum. r := p.scav.inUse.removeLast(p.scav.reservationBytes) @@ -525,6 +531,8 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) { // //go:systemstack func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) { + assertLockHeld(p.mheapLock) + if r.size() == 0 || gen != p.scav.gen { return } @@ -552,6 +560,8 @@ func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) { // //go:systemstack func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) { + assertLockHeld(p.mheapLock) + // Defensively check if we've recieved an empty address range. // If so, just return. if work.size() == 0 { @@ -610,6 +620,8 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui // If we found something, scavenge it and return! if npages != 0 { work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)} + + assertLockHeld(p.mheapLock) // Must be locked on return. return uintptr(npages) * pageSize, work } } @@ -674,12 +686,16 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages) if npages > 0 { work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)} + + assertLockHeld(p.mheapLock) // Must be locked on return. return uintptr(npages) * pageSize, work } // We were fooled, so let's continue from where we left off. work.limit = offAddr{chunkBase(candidateChunkIdx)} } + + assertLockHeld(p.mheapLock) // Must be locked on return. return 0, work } @@ -692,6 +708,8 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui // // p.mheapLock must be held. func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr { + assertLockHeld(p.mheapLock) + p.chunkOf(ci).scavenged.setRange(base, npages) // Compute the full address for the start of the range. diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 14a73c0491..66a59cb999 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -483,10 +483,15 @@ func (s *mspan) layout() (size, n, total uintptr) { // indirect call from the fixalloc initializer, the compiler can't see // this. // +// The heap lock must be held. +// //go:nowritebarrierrec func recordspan(vh unsafe.Pointer, p unsafe.Pointer) { h := (*mheap)(vh) s := (*mspan)(p) + + assertLockHeld(&h.lock) + if len(h.allspans) >= cap(h.allspans) { n := 64 * 1024 / sys.PtrSize if n < cap(h.allspans)*3/2 { @@ -721,7 +726,7 @@ func (h *mheap) init() { // // reclaim implements the page-reclaimer half of the sweeper. // -// h must NOT be locked. +// h.lock must NOT be held. func (h *mheap) reclaim(npage uintptr) { // TODO(austin): Half of the time spent freeing spans is in // locking/unlocking the heap (even with low contention). We @@ -804,6 +809,8 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr { // In particular, if a span were freed and merged concurrently // with this probing heapArena.spans, it would be possible to // observe arbitrary, stale span pointers. + assertLockHeld(&h.lock) + n0 := n var nFreed uintptr sg := h.sweepgen @@ -858,6 +865,8 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr { traceGCSweepSpan((n0 - nFreed) * pageSize) lock(&h.lock) } + + assertLockHeld(&h.lock) // Must be locked on return. return nFreed } @@ -1011,7 +1020,7 @@ func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) { // tryAllocMSpan attempts to allocate an mspan object from // the P-local cache, but may fail. // -// h need not be locked. +// h.lock need not be held. // // This caller must ensure that its P won't change underneath // it during this function. Currently to ensure that we enforce @@ -1035,7 +1044,7 @@ func (h *mheap) tryAllocMSpan() *mspan { // allocMSpanLocked allocates an mspan object. // -// h must be locked. +// h.lock must be held. // // allocMSpanLocked must be called on the system stack because // its caller holds the heap lock. See mheap for details. @@ -1044,6 +1053,8 @@ func (h *mheap) tryAllocMSpan() *mspan { // //go:systemstack func (h *mheap) allocMSpanLocked() *mspan { + assertLockHeld(&h.lock) + pp := getg().m.p.ptr() if pp == nil { // We don't have a p so just do the normal thing. @@ -1065,7 +1076,7 @@ func (h *mheap) allocMSpanLocked() *mspan { // freeMSpanLocked free an mspan object. // -// h must be locked. +// h.lock must be held. // // freeMSpanLocked must be called on the system stack because // its caller holds the heap lock. See mheap for details. @@ -1074,6 +1085,8 @@ func (h *mheap) allocMSpanLocked() *mspan { // //go:systemstack func (h *mheap) freeMSpanLocked(s *mspan) { + assertLockHeld(&h.lock) + pp := getg().m.p.ptr() // First try to free the mspan directly to the cache. if pp != nil && pp.mspancache.len < len(pp.mspancache.buf) { @@ -1097,7 +1110,7 @@ func (h *mheap) freeMSpanLocked(s *mspan) { // // The returned span is fully initialized. // -// h must not be locked. +// h.lock must not be held. // // allocSpan must be called on the system stack both because it acquires // the heap lock and because it must block GC transitions. @@ -1281,8 +1294,10 @@ HaveSpan: // Try to add at least npage pages of memory to the heap, // returning whether it worked. // -// h must be locked. +// h.lock must be held. func (h *mheap) grow(npage uintptr) bool { + assertLockHeld(&h.lock) + // We must grow the heap in whole palloc chunks. ask := alignUp(npage, pallocChunkPages) * pageSize @@ -1391,6 +1406,8 @@ func (h *mheap) freeManual(s *mspan, typ spanAllocType) { } func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) { + assertLockHeld(&h.lock) + switch s.state.get() { case mSpanManual: if s.allocCount != 0 { diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go index 2af1c97e0b..dac1f39969 100644 --- a/src/runtime/mpagealloc.go +++ b/src/runtime/mpagealloc.go @@ -349,6 +349,8 @@ func (p *pageAlloc) chunkOf(ci chunkIdx) *pallocData { // // p.mheapLock must be held. func (p *pageAlloc) grow(base, size uintptr) { + assertLockHeld(p.mheapLock) + // Round up to chunks, since we can't deal with increments smaller // than chunks. Also, sysGrow expects aligned values. limit := alignUp(base+size, pallocChunkBytes) @@ -413,6 +415,8 @@ func (p *pageAlloc) grow(base, size uintptr) { // // p.mheapLock must be held. func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) { + assertLockHeld(p.mheapLock) + // base, limit, start, and end are inclusive. limit := base + npages*pageSize - 1 sc, ec := chunkIndex(base), chunkIndex(limit) @@ -499,6 +503,8 @@ func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) { // // p.mheapLock must be held. func (p *pageAlloc) allocRange(base, npages uintptr) uintptr { + assertLockHeld(p.mheapLock) + limit := base + npages*pageSize - 1 sc, ec := chunkIndex(base), chunkIndex(limit) si, ei := chunkPageIndex(base), chunkPageIndex(limit) @@ -534,6 +540,8 @@ func (p *pageAlloc) allocRange(base, npages uintptr) uintptr { // // p.mheapLock must be held. func (p *pageAlloc) findMappedAddr(addr offAddr) offAddr { + assertLockHeld(p.mheapLock) + // If we're not in a test, validate first by checking mheap_.arenas. // This is a fast path which is only safe to use outside of testing. ai := arenaIndex(addr.addr()) @@ -568,6 +576,8 @@ func (p *pageAlloc) findMappedAddr(addr offAddr) offAddr { // // p.mheapLock must be held. func (p *pageAlloc) find(npages uintptr) (uintptr, offAddr) { + assertLockHeld(p.mheapLock) + // Search algorithm. // // This algorithm walks each level l of the radix tree from the root level @@ -786,7 +796,13 @@ nextLevel: // should be ignored. // // p.mheapLock must be held. +// +// Must run on the system stack because p.mheapLock must be held. +// +//go:systemstack func (p *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) { + assertLockHeld(p.mheapLock) + // If the searchAddr refers to a region which has a higher address than // any known chunk, then we know we're out of memory. if chunkIndex(p.searchAddr.addr()) >= p.end { @@ -841,7 +857,13 @@ Found: // free returns npages worth of memory starting at base back to the page heap. // // p.mheapLock must be held. +// +// Must run on the system stack because p.mheapLock must be held. +// +//go:systemstack func (p *pageAlloc) free(base, npages uintptr) { + assertLockHeld(p.mheapLock) + // If we're freeing pages below the p.searchAddr, update searchAddr. if b := (offAddr{base}); b.lessThan(p.searchAddr) { p.searchAddr = b diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go index 5f76501a1c..4b5c66d8d6 100644 --- a/src/runtime/mpagecache.go +++ b/src/runtime/mpagecache.go @@ -71,8 +71,14 @@ func (c *pageCache) allocN(npages uintptr) (uintptr, uintptr) { // into s. Then, it clears the cache, such that empty returns // true. // -// p.mheapLock must be held or the world must be stopped. +// p.mheapLock must be held. +// +// Must run on the system stack because p.mheapLock must be held. +// +//go:systemstack func (c *pageCache) flush(p *pageAlloc) { + assertLockHeld(p.mheapLock) + if c.empty() { return } @@ -103,7 +109,13 @@ func (c *pageCache) flush(p *pageAlloc) { // chunk. // // p.mheapLock must be held. +// +// Must run on the system stack because p.mheapLock must be held. +// +//go:systemstack func (p *pageAlloc) allocToCache() pageCache { + assertLockHeld(p.mheapLock) + // If the searchAddr refers to a region which has a higher address than // any known chunk, then we know we're out of memory. if chunkIndex(p.searchAddr.addr()) >= p.end { diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 82284e6cd6..ced27ceb3a 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -4603,7 +4603,9 @@ func (pp *p) destroy() { mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i])) } pp.mspancache.len = 0 + lock(&mheap_.lock) pp.pcache.flush(&mheap_.pages) + unlock(&mheap_.lock) }) freemcache(pp.mcache) pp.mcache = nil -- cgit v1.3