From 2517f4946b42b8deedb864c884f1b41311d45850 Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Wed, 14 Oct 2020 17:18:27 -0400
Subject: runtime: remove debugCachedWork

debugCachedWork and all of its dependent fields and code were added to
aid in debugging issue #27993. Now that the source of the problem is
known and mitigated (via the extra work check after STW in gcMarkDone),
these extra checks are no longer required and simply make the code more
difficult to follow.

Remove it all.

Updates #27993

Change-Id: I594beedd5ca61733ba9cc9eaad8f80ea92df1a0d
Reviewed-on: https://go-review.googlesource.com/c/go/+/262350
Trust: Michael Pratt <mpratt@google.com>
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
---
 src/runtime/mgc.go | 121 +++++++++++------------------------------------------
 1 file changed, 24 insertions(+), 97 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index bd87144355..0a4d5616a5 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1407,19 +1407,6 @@ func gcStart(trigger gcTrigger) {
 // This is protected by markDoneSema.
 var gcMarkDoneFlushed uint32
 
-// debugCachedWork enables extra checks for debugging premature mark
-// termination.
-//
-// For debugging issue #27993.
-const debugCachedWork = false
-
-// gcWorkPauseGen is for debugging the mark completion algorithm.
-// gcWork put operations spin while gcWork.pauseGen == gcWorkPauseGen.
-// Only used if debugCachedWork is true.
-//
-// For debugging issue #27993.
-var gcWorkPauseGen uint32 = 1
-
 // gcMarkDone transitions the GC from mark to mark termination if all
 // reachable objects have been marked (that is, there are no grey
 // objects and can be no more in the future). Otherwise, it flushes
@@ -1475,15 +1462,7 @@ top:
 			// Flush the write barrier buffer, since this may add
 			// work to the gcWork.
 			wbBufFlush1(_p_)
-			// For debugging, shrink the write barrier
-			// buffer so it flushes immediately.
-			// wbBuf.reset will keep it at this size as
-			// long as throwOnGCWork is set.
-			if debugCachedWork {
-				b := &_p_.wbBuf
-				b.end = uintptr(unsafe.Pointer(&b.buf[wbBufEntryPointers]))
-				b.debugGen = gcWorkPauseGen
-			}
+
 			// Flush the gcWork, since this may create global work
 			// and set the flushedWork flag.
 			//
@@ -1494,29 +1473,12 @@ top:
 			if _p_.gcw.flushedWork {
 				atomic.Xadd(&gcMarkDoneFlushed, 1)
 				_p_.gcw.flushedWork = false
-			} else if debugCachedWork {
-				// For debugging, freeze the gcWork
-				// until we know whether we've reached
-				// completion or not. If we think
-				// we've reached completion, but
-				// there's a paused gcWork, then
-				// that's a bug.
-				_p_.gcw.pauseGen = gcWorkPauseGen
-				// Capture the G's stack.
-				for i := range _p_.gcw.pauseStack {
-					_p_.gcw.pauseStack[i] = 0
-				}
-				callers(1, _p_.gcw.pauseStack[:])
 			}
 		})
 		casgstatus(gp, _Gwaiting, _Grunning)
 	})
 
 	if gcMarkDoneFlushed != 0 {
-		if debugCachedWork {
-			// Release paused gcWorks.
-			atomic.Xadd(&gcWorkPauseGen, 1)
-		}
 		// More grey objects were discovered since the
 		// previous termination check, so there may be more
 		// work to do. Keep going. It's possible the
@@ -1526,13 +1488,6 @@ top:
 		goto top
 	}
 
-	if debugCachedWork {
-		throwOnGCWork = true
-		// Release paused gcWorks. If there are any, they
-		// should now observe throwOnGCWork and panic.
-		atomic.Xadd(&gcWorkPauseGen, 1)
-	}
-
 	// There was no global work, no local work, and no Ps
 	// communicated work since we took markDoneSema. Therefore
 	// there are no grey objects and no more objects can be
@@ -1549,59 +1504,33 @@ top:
 	// below. The important thing is that the wb remains active until
 	// all marking is complete. This includes writes made by the GC.
 
-	if debugCachedWork {
-		// For debugging, double check that no work was added after we
-		// went around above and disable write barrier buffering.
+	// There is sometimes work left over when we enter mark termination due
+	// to write barriers performed after the completion barrier above.
+	// Detect this and resume concurrent mark. This is obviously
+	// unfortunate.
+	//
+	// See issue #27993 for details.
+	//
+	// Switch to the system stack to call wbBufFlush1, though in this case
+	// it doesn't matter because we're non-preemptible anyway.
+	restart := false
+	systemstack(func() {
 		for _, p := range allp {
-			gcw := &p.gcw
-			if !gcw.empty() {
-				printlock()
-				print("runtime: P ", p.id, " flushedWork ", gcw.flushedWork)
-				if gcw.wbuf1 == nil {
-					print(" wbuf1=<nil>")
-				} else {
-					print(" wbuf1.n=", gcw.wbuf1.nobj)
-				}
-				if gcw.wbuf2 == nil {
-					print(" wbuf2=<nil>")
-				} else {
-					print(" wbuf2.n=", gcw.wbuf2.nobj)
-				}
-				print("\n")
-				if gcw.pauseGen == gcw.putGen {
-					println("runtime: checkPut already failed at this generation")
-				}
-				throw("throwOnGCWork")
+			wbBufFlush1(p)
+			if !p.gcw.empty() {
+				restart = true
+				break
 			}
 		}
-	} else {
-		// For unknown reasons (see issue #27993), there is
-		// sometimes work left over when we enter mark
-		// termination. Detect this and resume concurrent
-		// mark. This is obviously unfortunate.
-		//
-		// Switch to the system stack to call wbBufFlush1,
-		// though in this case it doesn't matter because we're
-		// non-preemptible anyway.
-		restart := false
+	})
+	if restart {
+		getg().m.preemptoff = ""
 		systemstack(func() {
-			for _, p := range allp {
-				wbBufFlush1(p)
-				if !p.gcw.empty() {
-					restart = true
-					break
-				}
-			}
+			now := startTheWorldWithSema(true)
+			work.pauseNS += now - work.pauseStart
 		})
-		if restart {
-			getg().m.preemptoff = ""
-			systemstack(func() {
-				now := startTheWorldWithSema(true)
-				work.pauseNS += now - work.pauseStart
-			})
-			semrelease(&worldsema)
-			goto top
-		}
+		semrelease(&worldsema)
+		goto top
 	}
 
 	// Disable assists and background workers. We must do
@@ -2085,7 +2014,7 @@ func gcMark(start_time int64) {
 		// ensured all reachable objects were marked, all of
 		// these must be pointers to black objects. Hence we
 		// can just discard the write barrier buffer.
-		if debug.gccheckmark > 0 || throwOnGCWork {
+		if debug.gccheckmark > 0 {
 			// For debugging, flush the buffer and make
 			// sure it really was all marked.
 			wbBufFlush1(p)
@@ -2117,8 +2046,6 @@ func gcMark(start_time int64) {
 		gcw.dispose()
 	}
 
-	throwOnGCWork = false
-
 	cachestats()
 
 	// Update the marked heap stat.
-- 
cgit v1.3


From e313fd7448ed0dabf98dc725bee2361e905f208b Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Thu, 9 Jul 2020 16:20:48 -0400
Subject: runtime: drop unused work.ndone field

This field is unused since golang.org/cl/134785 and thus can be
trivially removed.

Change-Id: I1a87f8e78ffdf662440409404f0251c40bc56a4f
Reviewed-on: https://go-review.googlesource.com/c/go/+/241741
Trust: Michael Pratt <mpratt@google.com>
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
---
 src/runtime/mgc.go | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 0a4d5616a5..65ac654b14 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -983,7 +983,6 @@ var work struct {
 	nproc  uint32
 	tstart int64
 	nwait  uint32
-	ndone  uint32
 
 	// Number of roots of various root types. Set by gcMarkRootPrepare.
 	nFlushCacheRoots                               int
-- 
cgit v1.3


From db185e543fe471c522790b7d93291e786dc54a84 Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Tue, 7 Jul 2020 17:55:40 -0400
Subject: runtime: drop redundant gcBlackenEnabled reset

This reset of gcBlackenEnabled is a no-op because it was already reset
almost immediately before in gcMarkDone, which is the only caller of
gcMarkTermination.

Adjust the comment to clarify setGCPhase a bit more. We are coming from
_GCmark, so write barriers are already enabled.

Change-Id: Ieac2dadf33c3c5a44e8a25a499dea8cfe03b8d73
Reviewed-on: https://go-review.googlesource.com/c/go/+/241357
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
---
 src/runtime/mgc.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 65ac654b14..c42c7fbd29 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1558,10 +1558,10 @@ top:
 	gcMarkTermination(nextTriggerRatio)
 }
 
+// World must be stopped and mark assists and background workers must be
+// disabled.
 func gcMarkTermination(nextTriggerRatio float64) {
-	// World is stopped.
-	// Start marktermination which includes enabling the write barrier.
-	atomic.Store(&gcBlackenEnabled, 0)
+	// Start marktermination (write barrier remains enabled for now).
 	setGCPhase(_GCmarktermination)
 
 	work.heap1 = memstats.heap_live
-- 
cgit v1.3


From 8cc280aa727bc7159adfdd083861472aa3066a35 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Thu, 23 Jul 2020 20:13:49 +0000
Subject: runtime: define and enforce synchronization on heap_scan

Currently heap_scan is mostly protected by the heap lock, but
gcControllerState.revise sometimes accesses it without a lock. In an
effort to make gcControllerState.revise callable from more contexts (and
have its synchronization guarantees actually respected), make heap_scan
atomically read from and written to, unless the world is stopped.

Note that we don't update gcControllerState.revise's erroneous doc
comment here because this change isn't about revise's guarantees, just
about heap_scan. The comment is updated in a later change.

Change-Id: Iddbbeb954767c704c2bd1d221f36e6c4fc9948a6
Reviewed-on: https://go-review.googlesource.com/c/go/+/246960
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Emmanuel Odeke <emmanuel@orijtech.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/mgc.go    | 5 +++--
 src/runtime/mheap.go  | 4 ++--
 src/runtime/mstats.go | 4 +++-
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index c42c7fbd29..94539dd770 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -494,6 +494,7 @@ func (c *gcControllerState) revise() {
 		gcpercent = 100000
 	}
 	live := atomic.Load64(&memstats.heap_live)
+	scan := atomic.Load64(&memstats.heap_scan)
 
 	// Assume we're under the soft goal. Pace GC to complete at
 	// next_gc assuming the heap is in steady-state.
@@ -508,7 +509,7 @@ func (c *gcControllerState) revise() {
 	//
 	// (This is a float calculation to avoid overflowing on
 	// 100*heap_scan.)
-	scanWorkExpected := int64(float64(memstats.heap_scan) * 100 / float64(100+gcpercent))
+	scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent))
 
 	if live > memstats.next_gc || c.scanWork > scanWorkExpected {
 		// We're past the soft goal, or we've already done more scan
@@ -518,7 +519,7 @@ func (c *gcControllerState) revise() {
 		heapGoal = int64(float64(memstats.next_gc) * maxOvershoot)
 
 		// Compute the upper bound on the scan work remaining.
-		scanWorkExpected = int64(memstats.heap_scan)
+		scanWorkExpected = int64(scan)
 	}
 
 	// Compute the remaining scan work estimate.
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 1a57bcd66e..124bbacd1d 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -1168,7 +1168,7 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS
 				throw("mheap.allocSpan called with no P")
 			}
 		}
-		memstats.heap_scan += uint64(c.local_scan)
+		atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
 		c.local_scan = 0
 		memstats.tinyallocs += uint64(c.local_tinyallocs)
 		c.local_tinyallocs = 0
@@ -1375,7 +1375,7 @@ func (h *mheap) freeSpan(s *mspan) {
 	systemstack(func() {
 		c := getg().m.p.ptr().mcache
 		lock(&h.lock)
-		memstats.heap_scan += uint64(c.local_scan)
+		atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
 		c.local_scan = 0
 		memstats.tinyallocs += uint64(c.local_tinyallocs)
 		c.local_tinyallocs = 0
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index b95b332134..2c217ecf84 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -139,6 +139,8 @@ type mstats struct {
 	// no-scan objects and no-scan tails of objects.
 	//
 	// Whenever this is updated, call gcController.revise().
+	//
+	// Read and written atomically or with the world stopped.
 	heap_scan uint64
 
 	// heap_marked is the number of bytes marked by the previous
@@ -635,7 +637,7 @@ func flushallmcaches() {
 func purgecachedstats(c *mcache) {
 	// Protected by either heap or GC lock.
 	h := &mheap_
-	memstats.heap_scan += uint64(c.local_scan)
+	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
 	c.local_scan = 0
 	memstats.tinyallocs += uint64(c.local_tinyallocs)
 	c.local_tinyallocs = 0
-- 
cgit v1.3


From 93d7d1685ee9e9f296e20f6c712796e54602e891 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Thu, 23 Jul 2020 20:17:40 +0000
Subject: runtime: load gcControllerState.scanWork atomically in revise

gcControllerState.scanWork's docs state that it must be accessed
atomically during a GC cycle, but gcControllerState.revise does not do
this (even when called with the heap lock held).

This change makes it so that gcControllerState.revise accesses scanWork
atomically and explicitly.

Note that we don't update gcControllerState.revise's erroneous doc
comment here because this change isn't about revise's guarantees, just
about heap_scan. The comment is updated in a later change.

Change-Id: Iafc3ad214e517190bfd8a219896d23da19f7659d
Reviewed-on: https://go-review.googlesource.com/c/go/+/246961
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/mgc.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 94539dd770..4b9a6da3b3 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -495,6 +495,7 @@ func (c *gcControllerState) revise() {
 	}
 	live := atomic.Load64(&memstats.heap_live)
 	scan := atomic.Load64(&memstats.heap_scan)
+	work := atomic.Loadint64(&c.scanWork)
 
 	// Assume we're under the soft goal. Pace GC to complete at
 	// next_gc assuming the heap is in steady-state.
@@ -511,7 +512,7 @@ func (c *gcControllerState) revise() {
 	// 100*heap_scan.)
 	scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent))
 
-	if live > memstats.next_gc || c.scanWork > scanWorkExpected {
+	if live > memstats.next_gc || work > scanWorkExpected {
 		// We're past the soft goal, or we've already done more scan
 		// work than we expected. Pace GC so that in the worst case it
 		// will complete by the hard goal.
@@ -529,7 +530,7 @@ func (c *gcControllerState) revise() {
 	// (scanWork), so allocation will change this difference
 	// slowly in the soft regime and not at all in the hard
 	// regime.
-	scanWorkRemaining := scanWorkExpected - c.scanWork
+	scanWorkRemaining := scanWorkExpected - work
 	if scanWorkRemaining < 1000 {
 		// We set a somewhat arbitrary lower bound on
 		// remaining scan work since if we aim a little high,
-- 
cgit v1.3


From f5c6875f3228951afa1fcf2ec01c614e0fb7e2dd Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Thu, 23 Jul 2020 20:24:56 +0000
Subject: runtime: make next_gc atomically accessed

next_gc is mostly updated only during a STW, but may occasionally be
updated by calls to e.g. debug.SetGCPercent. In this case the update is
supposed to be protected by the heap lock, but in reality it's accessed
by gcController.revise which may be called without the heap lock held
(despite its documentation, which will be updated in a later change).

Change the synchronization policy on next_gc so that it's atomically
accessed when the world is not stopped to aid in making revise safe for
concurrent use.

Change-Id: I79657a72f91563f3241aaeda66e8a7757d399529
Reviewed-on: https://go-review.googlesource.com/c/go/+/246962
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/mgc.go         | 13 +++++++------
 src/runtime/mgcscavenge.go |  2 +-
 src/runtime/mstats.go      | 10 ++++++++--
 src/runtime/trace.go       |  5 +++--
 4 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 4b9a6da3b3..5c565a5853 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -409,7 +409,8 @@ type gcControllerState struct {
 }
 
 // startCycle resets the GC controller's state and computes estimates
-// for a new GC cycle. The caller must hold worldsema.
+// for a new GC cycle. The caller must hold worldsema and the world
+// must be stopped.
 func (c *gcControllerState) startCycle() {
 	c.scanWork = 0
 	c.bgScanCredit = 0
@@ -499,7 +500,7 @@ func (c *gcControllerState) revise() {
 
 	// Assume we're under the soft goal. Pace GC to complete at
 	// next_gc assuming the heap is in steady-state.
-	heapGoal := int64(memstats.next_gc)
+	heapGoal := int64(atomic.Load64(&memstats.next_gc))
 
 	// Compute the expected scan work remaining.
 	//
@@ -512,12 +513,12 @@ func (c *gcControllerState) revise() {
 	// 100*heap_scan.)
 	scanWorkExpected := int64(float64(scan) * 100 / float64(100+gcpercent))
 
-	if live > memstats.next_gc || work > scanWorkExpected {
+	if int64(live) > heapGoal || work > scanWorkExpected {
 		// We're past the soft goal, or we've already done more scan
 		// work than we expected. Pace GC so that in the worst case it
 		// will complete by the hard goal.
 		const maxOvershoot = 1.1
-		heapGoal = int64(float64(memstats.next_gc) * maxOvershoot)
+		heapGoal = int64(float64(heapGoal) * maxOvershoot)
 
 		// Compute the upper bound on the scan work remaining.
 		scanWorkExpected = int64(scan)
@@ -846,7 +847,7 @@ func gcSetTriggerRatio(triggerRatio float64) {
 
 	// Commit to the trigger and goal.
 	memstats.gc_trigger = trigger
-	memstats.next_gc = goal
+	atomic.Store64(&memstats.next_gc, goal)
 	if trace.enabled {
 		traceNextGC()
 	}
@@ -903,7 +904,7 @@ func gcSetTriggerRatio(triggerRatio float64) {
 //
 // mheap_.lock must be held or the world must be stopped.
 func gcEffectiveGrowthRatio() float64 {
-	egogc := float64(memstats.next_gc-memstats.heap_marked) / float64(memstats.heap_marked)
+	egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked)
 	if egogc < 0 {
 		// Shouldn't happen, but just in case.
 		egogc = 0
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 34646828e5..6328b295ca 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -123,7 +123,7 @@ func gcPaceScavenger() {
 		return
 	}
 	// Compute our scavenging goal.
-	goalRatio := float64(memstats.next_gc) / float64(memstats.last_next_gc)
+	goalRatio := float64(atomic.Load64(&memstats.next_gc)) / float64(memstats.last_next_gc)
 	retainedGoal := uint64(float64(memstats.last_heap_inuse) * goalRatio)
 	// Add retainExtraPercent overhead to retainedGoal. This calculation
 	// looks strange but the purpose is to arrive at an integer division
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 2c217ecf84..8cc20552fb 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -57,9 +57,15 @@ type mstats struct {
 	gc_sys       uint64 // updated atomically or during STW
 	other_sys    uint64 // updated atomically or during STW
 
-	// Statistics about garbage collector.
+	// Statistics about the garbage collector.
+
+	// next_gc is the goal heap_live for when next GC ends.
+	// Set to ^uint64(0) if disabled.
+	//
+	// Read and written atomically, unless the world is stopped.
+	next_gc uint64
+
 	// Protected by mheap or stopping the world during GC.
-	next_gc         uint64 // goal heap_live for when next GC ends; ^0 if disabled
 	last_gc_unix    uint64 // last gc (in unix time)
 	pause_total_ns  uint64
 	pause_ns        [256]uint64 // circular buffer of recent gc pause lengths
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 169b650eb4..d3ecd148be 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -13,6 +13,7 @@
 package runtime
 
 import (
+	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -1146,11 +1147,11 @@ func traceHeapAlloc() {
 }
 
 func traceNextGC() {
-	if memstats.next_gc == ^uint64(0) {
+	if nextGC := atomic.Load64(&memstats.next_gc); nextGC == ^uint64(0) {
 		// Heap-based triggering is disabled.
 		traceEvent(traceEvNextGC, -1, 0)
 	} else {
-		traceEvent(traceEvNextGC, -1, memstats.next_gc)
+		traceEvent(traceEvNextGC, -1, nextGC)
 	}
 }
 
-- 
cgit v1.3


From ce46f197b6c75281b77ee93338e2559671e28b01 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Thu, 23 Jul 2020 20:48:06 +0000
Subject: runtime: access the assist ratio atomically

This change makes it so that the GC assist ratio (the pair of
gcControllerState fields assistBytesPerWork and assistWorkPerByte) is
updated atomically. Note that the pair of fields are not updated
together atomically, but that's OK. The code here was already racy for
some time and in practice the assist ratio moves very slowly.

The purpose of this change is so that we can document
gcController.revise to be safe for concurrent use, which will be useful
in further changes.

Change-Id: Ie25d630207c88e4f85f2b8953f6a0051ebf1b4ea
Reviewed-on: https://go-review.googlesource.com/c/go/+/246963
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/mgc.go     | 51 ++++++++++++++++++++++++++++++++++++++++++--------
 src/runtime/mgcmark.go | 17 +++++++++++------
 src/runtime/proc.go    |  3 ++-
 3 files changed, 56 insertions(+), 15 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 5c565a5853..c54f893689 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -388,10 +388,24 @@ type gcControllerState struct {
 	// bytes that should be performed by mutator assists. This is
 	// computed at the beginning of each cycle and updated every
 	// time heap_scan is updated.
-	assistWorkPerByte float64
+	//
+	// Stored as a uint64, but it's actually a float64. Use
+	// float64frombits to get the value.
+	//
+	// Read and written atomically.
+	assistWorkPerByte uint64
 
 	// assistBytesPerWork is 1/assistWorkPerByte.
-	assistBytesPerWork float64
+	//
+	// Stored as a uint64, but it's actually a float64. Use
+	// float64frombits to get the value.
+	//
+	// Read and written atomically.
+	//
+	// Note that because this is read and written independently
+	// from assistWorkPerByte users may notice a skew between
+	// the two values, and such a state should be safe.
+	assistBytesPerWork uint64
 
 	// fractionalUtilizationGoal is the fraction of wall clock
 	// time that should be spent in the fractional mark worker on
@@ -470,7 +484,8 @@ func (c *gcControllerState) startCycle() {
 	c.revise()
 
 	if debug.gcpacertrace > 0 {
-		print("pacer: assist ratio=", c.assistWorkPerByte,
+		assistRatio := float64frombits(atomic.Load64(&c.assistWorkPerByte))
+		print("pacer: assist ratio=", assistRatio,
 			" (scan ", memstats.heap_scan>>20, " MB in ",
 			work.initialHeapLive>>20, "->",
 			memstats.next_gc>>20, " MB)",
@@ -480,9 +495,22 @@ func (c *gcControllerState) startCycle() {
 }
 
 // revise updates the assist ratio during the GC cycle to account for
-// improved estimates. This should be called either under STW or
-// whenever memstats.heap_scan, memstats.heap_live, or
-// memstats.next_gc is updated (with mheap_.lock held).
+// improved estimates. This should be called whenever memstats.heap_scan,
+// memstats.heap_live, or memstats.next_gc is updated. It is safe to
+// call concurrently, but it may race with other calls to revise.
+//
+// The result of this race is that the two assist ratio values may not line
+// up or may be stale. In practice this is OK because the assist ratio
+// moves slowly throughout a GC cycle, and the assist ratio is a best-effort
+// heuristic anyway. Furthermore, no part of the heuristic depends on
+// the two assist ratio values being exact reciprocals of one another, since
+// the two values are used to convert values from different sources.
+//
+// The worst case result of this raciness is that we may miss a larger shift
+// in the ratio (say, if we decide to pace more aggressively against the
+// hard heap goal) but even this "hard goal" is best-effort (see #40460).
+// The dedicated GC should ensure we don't exceed the hard goal by too much
+// in the rare case we do exceed it.
 //
 // It should only be called when gcBlackenEnabled != 0 (because this
 // is when assists are enabled and the necessary statistics are
@@ -555,8 +583,15 @@ func (c *gcControllerState) revise() {
 	// Compute the mutator assist ratio so by the time the mutator
 	// allocates the remaining heap bytes up to next_gc, it will
 	// have done (or stolen) the remaining amount of scan work.
-	c.assistWorkPerByte = float64(scanWorkRemaining) / float64(heapRemaining)
-	c.assistBytesPerWork = float64(heapRemaining) / float64(scanWorkRemaining)
+	// Note that the assist ratio values are updated atomically
+	// but not together. This means there may be some degree of
+	// skew between the two values. This is generally OK as the
+	// values shift relatively slowly over the course of a GC
+	// cycle.
+	assistWorkPerByte := float64(scanWorkRemaining) / float64(heapRemaining)
+	assistBytesPerWork := float64(heapRemaining) / float64(scanWorkRemaining)
+	atomic.Store64(&c.assistWorkPerByte, float64bits(assistWorkPerByte))
+	atomic.Store64(&c.assistBytesPerWork, float64bits(assistBytesPerWork))
 }
 
 // endCycle computes the trigger ratio for the next cycle.
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 79df59d6d6..c71c0e58d3 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -400,11 +400,13 @@ retry:
 	// balance positive. When the required amount of work is low,
 	// we over-assist to build up credit for future allocations
 	// and amortize the cost of assisting.
+	assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
+	assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
 	debtBytes := -gp.gcAssistBytes
-	scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes))
+	scanWork := int64(assistWorkPerByte * float64(debtBytes))
 	if scanWork < gcOverAssistWork {
 		scanWork = gcOverAssistWork
-		debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork))
+		debtBytes = int64(assistBytesPerWork * float64(scanWork))
 	}
 
 	// Steal as much credit as we can from the background GC's
@@ -418,7 +420,7 @@ retry:
 	if bgScanCredit > 0 {
 		if bgScanCredit < scanWork {
 			stolen = bgScanCredit
-			gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen))
+			gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(stolen))
 		} else {
 			stolen = scanWork
 			gp.gcAssistBytes += debtBytes
@@ -543,7 +545,8 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 	// this scan work counts for. The "1+" is a poor man's
 	// round-up, to ensure this adds credit even if
 	// assistBytesPerWork is very low.
-	gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone))
+	assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
+	gp.gcAssistBytes += 1 + int64(assistBytesPerWork*float64(workDone))
 
 	// If this is the last worker and we ran out of work,
 	// signal a completion point.
@@ -637,7 +640,8 @@ func gcFlushBgCredit(scanWork int64) {
 		return
 	}
 
-	scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork)
+	assistBytesPerWork := float64frombits(atomic.Load64(&gcController.assistBytesPerWork))
+	scanBytes := int64(float64(scanWork) * assistBytesPerWork)
 
 	lock(&work.assistQueue.lock)
 	for !work.assistQueue.q.empty() && scanBytes > 0 {
@@ -670,7 +674,8 @@ func gcFlushBgCredit(scanWork int64) {
 
 	if scanBytes > 0 {
 		// Convert from scan bytes back to work.
-		scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte)
+		assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
+		scanWork = int64(float64(scanBytes) * assistWorkPerByte)
 		atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
 	}
 	unlock(&work.assistQueue.lock)
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index ec4e6d8751..ebecc92745 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -3208,7 +3208,8 @@ func goexit0(gp *g) {
 		// Flush assist credit to the global pool. This gives
 		// better information to pacing if the application is
 		// rapidly creating an exiting goroutines.
-		scanCredit := int64(gcController.assistWorkPerByte * float64(gp.gcAssistBytes))
+		assistWorkPerByte := float64frombits(atomic.Load64(&gcController.assistWorkPerByte))
+		scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes))
 		atomic.Xaddint64(&gcController.bgScanCredit, scanCredit)
 		gp.gcAssistBytes = 0
 	}
-- 
cgit v1.3


From d677899e903c4741920846f1af2c14c56f6e710e Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Thu, 23 Jul 2020 22:36:58 +0000
Subject: runtime: flush local_scan directly and more often

Now that local_scan is the last mcache-based statistic that is flushed
by purgecachedstats, and heap_scan and gcController.revise may be
interacted with concurrently, we don't need to flush heap_scan at
arbitrary locations where the heap is locked, and we don't need
purgecachedstats and cachestats anymore. Instead, we can flush
local_scan at the same time we update heap_live in refill, so the two
updates may share the same revise call.

Clean up unused functions, remove code that would cause the heap to get
locked in the allocSpan when it didn't need to (other than to flush
local_scan), and flush local_scan explicitly in a few important places.
Notably we need to flush local_scan whenever we flush the other stats,
but it doesn't need to be donated anywhere, so have releaseAll do the
flushing. Also, we need to flush local_scan before we set heap_scan at
the end of a GC, which was previously handled by cachestats. Just do so
explicitly -- it's not much code and it becomes a lot more clear why we
need to do so.

Change-Id: I35ac081784df7744d515479896a41d530653692d
Reviewed-on: https://go-review.googlesource.com/c/go/+/246968
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Trust: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/mcache.go | 22 ++++++++++++++++++++--
 src/runtime/mgc.go    | 14 ++++++++++++--
 src/runtime/mheap.go  | 49 +++----------------------------------------------
 src/runtime/mstats.go | 25 -------------------------
 4 files changed, 35 insertions(+), 75 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index fe603116a2..b8e388cc4f 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -124,7 +124,6 @@ func freemcache(c *mcache, recipient *mcache) {
 		// gcworkbuffree(c.gcworkbuf)
 
 		lock(&mheap_.lock)
-		purgecachedstats(c)
 		// Donate anything else that's left.
 		c.donate(recipient)
 		mheap_.cachealloc.free(unsafe.Pointer(c))
@@ -135,6 +134,8 @@ func freemcache(c *mcache, recipient *mcache) {
 // donate flushes data and resources which have no global
 // pool to another mcache.
 func (c *mcache) donate(d *mcache) {
+	// local_scan is handled separately because it's not
+	// like these stats -- it's used for GC pacing.
 	d.local_largealloc += c.local_largealloc
 	c.local_largealloc = 0
 	d.local_nlargealloc += c.local_nlargealloc
@@ -192,14 +193,22 @@ func (c *mcache) refill(spc spanClass) {
 	// Assume all objects from this span will be allocated in the
 	// mcache. If it gets uncached, we'll adjust this.
 	c.local_nsmallalloc[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount)
+
+	// Update heap_live with the same assumption.
 	usedBytes := uintptr(s.allocCount) * s.elemsize
 	atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes))
+
+	// While we're here, flush local_scan, since we have to call
+	// revise anyway.
+	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
+	c.local_scan = 0
+
 	if trace.enabled {
 		// heap_live changed.
 		traceHeapAlloc()
 	}
 	if gcBlackenEnabled != 0 {
-		// heap_live changed.
+		// heap_live and heap_scan changed.
 		gcController.revise()
 	}
 
@@ -248,6 +257,10 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
 }
 
 func (c *mcache) releaseAll() {
+	// Take this opportunity to flush local_scan.
+	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
+	c.local_scan = 0
+
 	sg := mheap_.sweepgen
 	for i := range c.alloc {
 		s := c.alloc[i]
@@ -273,6 +286,11 @@ func (c *mcache) releaseAll() {
 	// Clear tinyalloc pool.
 	c.tiny = 0
 	c.tinyoffset = 0
+
+	// Updated heap_scan and possible heap_live.
+	if gcBlackenEnabled != 0 {
+		gcController.revise()
+	}
 }
 
 // prepareForSweep flushes c if the system has entered a new sweep phase
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index c54f893689..55554c117c 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -2083,11 +2083,21 @@ func gcMark(start_time int64) {
 		gcw.dispose()
 	}
 
-	cachestats()
-
 	// Update the marked heap stat.
 	memstats.heap_marked = work.bytesMarked
 
+	// Flush local_scan from each mcache since we're about to modify
+	// heap_scan directly. If we were to flush this later, then local_scan
+	// might have incorrect information.
+	for _, p := range allp {
+		c := p.mcache
+		if c == nil {
+			continue
+		}
+		memstats.heap_scan += uint64(c.local_scan)
+		c.local_scan = 0
+	}
+
 	// Update other GC heap size stats. This must happen after
 	// cachestats (which flushes local statistics to these) and
 	// flushallmcaches (which modifies heap_live).
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 47f86ee38c..40fd58b0ef 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -1102,23 +1102,11 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS
 		base, scav = c.alloc(npages)
 		if base != 0 {
 			s = h.tryAllocMSpan()
-
-			if s != nil && gcBlackenEnabled == 0 && (manual || spanclass.sizeclass() != 0) {
+			if s != nil {
 				goto HaveSpan
 			}
-			// We're either running duing GC, failed to acquire a mspan,
-			// or the allocation is for a large object. This means we
-			// have to lock the heap and do a bunch of extra work,
-			// so go down the HaveBaseLocked path.
-			//
-			// We must do this during GC to avoid skew with heap_scan
-			// since we flush mcache stats whenever we lock.
-			//
-			// TODO(mknyszek): It would be nice to not have to
-			// lock the heap if it's a large allocation, but
-			// it's fine for now. The critical section here is
-			// short and large object allocations are relatively
-			// infrequent.
+			// We have a base but no mspan, so we need
+			// to lock the heap.
 		}
 	}
 
@@ -1145,30 +1133,6 @@ func (h *mheap) allocSpan(npages uintptr, manual bool, spanclass spanClass, sysS
 		// one now that we have the heap lock.
 		s = h.allocMSpanLocked()
 	}
-	if !manual {
-		// This is a heap span, so we should do some additional accounting
-		// which may only be done with the heap locked.
-
-		// Transfer stats from mcache to global.
-		var c *mcache
-		if gp.m.p != 0 {
-			c = gp.m.p.ptr().mcache
-		} else {
-			// This case occurs while bootstrapping.
-			// See the similar code in mallocgc.
-			c = mcache0
-			if c == nil {
-				throw("mheap.allocSpan called with no P")
-			}
-		}
-		atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
-		c.local_scan = 0
-
-		// heap_scan was been updated.
-		if gcBlackenEnabled != 0 {
-			gcController.revise()
-		}
-	}
 	unlock(&h.lock)
 
 HaveSpan:
@@ -1352,20 +1316,13 @@ func (h *mheap) grow(npage uintptr) bool {
 // Free the span back into the heap.
 func (h *mheap) freeSpan(s *mspan) {
 	systemstack(func() {
-		c := getg().m.p.ptr().mcache
 		lock(&h.lock)
-		atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
-		c.local_scan = 0
 		if msanenabled {
 			// Tell msan that this entire span is no longer in use.
 			base := unsafe.Pointer(s.base())
 			bytes := s.npages << _PageShift
 			msanfree(base, bytes)
 		}
-		if gcBlackenEnabled != 0 {
-			// heap_scan changed.
-			gcController.revise()
-		}
 		h.freeSpanLocked(s, true, true)
 		unlock(&h.lock)
 	})
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 341906fced..5eeb173640 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -556,9 +556,6 @@ func updatememstats() {
 		memstats.by_size[i].nfree = 0
 	}
 
-	// Aggregate local stats.
-	cachestats()
-
 	// Collect allocation stats. This is safe and consistent
 	// because the world is stopped.
 	var smallFree, totalAlloc, totalFree uint64
@@ -602,21 +599,6 @@ func updatememstats() {
 	memstats.heap_objects = memstats.nmalloc - memstats.nfree
 }
 
-// cachestats flushes all mcache stats.
-//
-// The world must be stopped.
-//
-//go:nowritebarrier
-func cachestats() {
-	for _, p := range allp {
-		c := p.mcache
-		if c == nil {
-			continue
-		}
-		purgecachedstats(c)
-	}
-}
-
 // flushmcache flushes the mcache of allp[i].
 //
 // The world must be stopped.
@@ -643,13 +625,6 @@ func flushallmcaches() {
 	}
 }
 
-//go:nosplit
-func purgecachedstats(c *mcache) {
-	// Protected by heap lock.
-	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
-	c.local_scan = 0
-}
-
 // Atomically increases a given *system* memory stat. We are counting on this
 // stat never overflowing a uintptr, so this function must only be used for
 // system memory stats.
-- 
cgit v1.3


From c8638498008f9874dc5a48734418e0fbea08cee9 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Fri, 24 Jul 2020 19:58:31 +0000
Subject: runtime: rename mcache fields to match Go style

This change renames a bunch of malloc statistics stored in the mcache
that are all named with the "local_" prefix. It also renames largeAlloc
to allocLarge to prevent a naming conflict, and next_sample because it
would be the last mcache field with the old C naming style.

Change-Id: I29695cb83b397a435ede7e9ad5c3c9be72767ea3
Reviewed-on: https://go-review.googlesource.com/c/go/+/246969
Trust: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go      | 14 ++++----
 src/runtime/malloc.go           | 12 +++----
 src/runtime/mcache.go           | 78 ++++++++++++++++++++---------------------
 src/runtime/mgc.go              |  8 ++---
 src/runtime/mgcsweep.go         |  6 ++--
 src/runtime/mstats.go           | 22 ++++++------
 src/runtime/pprof/mprof_test.go |  2 +-
 7 files changed, 71 insertions(+), 71 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index d71b180f76..47cbc286f6 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -346,18 +346,18 @@ func ReadMemStatsSlow() (base, slow MemStats) {
 				continue
 			}
 			// Collect large allocation stats.
-			largeFree += uint64(c.local_largefree)
-			slow.Frees += uint64(c.local_nlargefree)
+			largeFree += uint64(c.largeFree)
+			slow.Frees += uint64(c.largeFreeCount)
 
 			// Collect tiny allocation stats.
-			tinyAllocs += uint64(c.local_tinyallocs)
+			tinyAllocs += uint64(c.tinyAllocCount)
 
 			// Collect per-sizeclass stats.
 			for i := 0; i < _NumSizeClasses; i++ {
-				slow.Frees += uint64(c.local_nsmallfree[i])
-				bySize[i].Frees += uint64(c.local_nsmallfree[i])
-				bySize[i].Mallocs += uint64(c.local_nsmallfree[i])
-				smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i])
+				slow.Frees += uint64(c.smallFreeCount[i])
+				bySize[i].Frees += uint64(c.smallFreeCount[i])
+				bySize[i].Mallocs += uint64(c.smallFreeCount[i])
+				smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i])
 			}
 		}
 		slow.Frees += tinyAllocs
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index ec601ccb39..0f48d7f68e 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1040,7 +1040,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 				// The object fits into existing tiny block.
 				x = unsafe.Pointer(c.tiny + off)
 				c.tinyoffset = off + size
-				c.local_tinyallocs++
+				c.tinyAllocCount++
 				mp.mallocing = 0
 				releasem(mp)
 				return x
@@ -1082,7 +1082,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 		}
 	} else {
 		shouldhelpgc = true
-		span = c.largeAlloc(size, needzero, noscan)
+		span = c.allocLarge(size, needzero, noscan)
 		span.freeindex = 1
 		span.allocCount = 1
 		x = unsafe.Pointer(span.base())
@@ -1111,7 +1111,7 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 		} else {
 			scanSize = typ.ptrdata
 		}
-		c.local_scan += scanSize
+		c.scanAlloc += scanSize
 	}
 
 	// Ensure that the stores above that initialize x to
@@ -1153,8 +1153,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	}
 
 	if rate := MemProfileRate; rate > 0 {
-		if rate != 1 && size < c.next_sample {
-			c.next_sample -= size
+		if rate != 1 && size < c.nextSample {
+			c.nextSample -= size
 		} else {
 			mp := acquirem()
 			profilealloc(mp, x, size)
@@ -1221,7 +1221,7 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
 			throw("profilealloc called with no P")
 		}
 	}
-	c.next_sample = nextSample()
+	c.nextSample = nextSample()
 	mProf_Malloc(x, size)
 }
 
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index b8e388cc4f..c3e0e5e1f7 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -20,8 +20,8 @@ import (
 type mcache struct {
 	// The following members are accessed on every malloc,
 	// so they are grouped here for better caching.
-	next_sample uintptr // trigger heap sample after allocating this many bytes
-	local_scan  uintptr // bytes of scannable heap allocated
+	nextSample uintptr // trigger heap sample after allocating this many bytes
+	scanAlloc  uintptr // bytes of scannable heap allocated
 
 	// Allocator cache for tiny objects w/o pointers.
 	// See "Tiny allocator" comment in malloc.go.
@@ -48,13 +48,13 @@ type mcache struct {
 	// When read with stats from other mcaches and with the world
 	// stopped, the result will accurately reflect the state of the
 	// application.
-	local_tinyallocs  uintptr                  // number of tiny allocs not counted in other stats
-	local_largealloc  uintptr                  // bytes allocated for large objects
-	local_nlargealloc uintptr                  // number of large object allocations
-	local_nsmallalloc [_NumSizeClasses]uintptr // number of allocs for small objects
-	local_largefree   uintptr                  // bytes freed for large objects (>maxsmallsize)
-	local_nlargefree  uintptr                  // number of frees for large objects (>maxsmallsize)
-	local_nsmallfree  [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
+	tinyAllocCount  uintptr                  // number of tiny allocs not counted in other stats
+	largeAlloc      uintptr                  // bytes allocated for large objects
+	largeAllocCount uintptr                  // number of large object allocations
+	smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects
+	largeFree       uintptr                  // bytes freed for large objects (>maxSmallSize)
+	largeFreeCount  uintptr                  // number of frees for large objects (>maxSmallSize)
+	smallFreeCount  [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize)
 
 	// flushGen indicates the sweepgen during which this mcache
 	// was last flushed. If flushGen != mheap_.sweepgen, the spans
@@ -103,7 +103,7 @@ func allocmcache() *mcache {
 	for i := range c.alloc {
 		c.alloc[i] = &emptymspan
 	}
-	c.next_sample = nextSample()
+	c.nextSample = nextSample()
 	return c
 }
 
@@ -134,26 +134,26 @@ func freemcache(c *mcache, recipient *mcache) {
 // donate flushes data and resources which have no global
 // pool to another mcache.
 func (c *mcache) donate(d *mcache) {
-	// local_scan is handled separately because it's not
+	// scanAlloc is handled separately because it's not
 	// like these stats -- it's used for GC pacing.
-	d.local_largealloc += c.local_largealloc
-	c.local_largealloc = 0
-	d.local_nlargealloc += c.local_nlargealloc
-	c.local_nlargealloc = 0
-	for i := range c.local_nsmallalloc {
-		d.local_nsmallalloc[i] += c.local_nsmallalloc[i]
-		c.local_nsmallalloc[i] = 0
+	d.largeAlloc += c.largeAlloc
+	c.largeAlloc = 0
+	d.largeAllocCount += c.largeAllocCount
+	c.largeAllocCount = 0
+	for i := range c.smallAllocCount {
+		d.smallAllocCount[i] += c.smallAllocCount[i]
+		c.smallAllocCount[i] = 0
 	}
-	d.local_largefree += c.local_largefree
-	c.local_largefree = 0
-	d.local_nlargefree += c.local_nlargefree
-	c.local_nlargefree = 0
-	for i := range c.local_nsmallfree {
-		d.local_nsmallfree[i] += c.local_nsmallfree[i]
-		c.local_nsmallfree[i] = 0
+	d.largeFree += c.largeFree
+	c.largeFree = 0
+	d.largeFreeCount += c.largeFreeCount
+	c.largeFreeCount = 0
+	for i := range c.smallFreeCount {
+		d.smallFreeCount[i] += c.smallFreeCount[i]
+		c.smallFreeCount[i] = 0
 	}
-	d.local_tinyallocs += c.local_tinyallocs
-	c.local_tinyallocs = 0
+	d.tinyAllocCount += c.tinyAllocCount
+	c.tinyAllocCount = 0
 }
 
 // refill acquires a new span of span class spc for c. This span will
@@ -192,16 +192,16 @@ func (c *mcache) refill(spc spanClass) {
 
 	// Assume all objects from this span will be allocated in the
 	// mcache. If it gets uncached, we'll adjust this.
-	c.local_nsmallalloc[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount)
+	c.smallAllocCount[spc.sizeclass()] += uintptr(s.nelems) - uintptr(s.allocCount)
 
 	// Update heap_live with the same assumption.
 	usedBytes := uintptr(s.allocCount) * s.elemsize
 	atomic.Xadd64(&memstats.heap_live, int64(s.npages*pageSize)-int64(usedBytes))
 
-	// While we're here, flush local_scan, since we have to call
+	// While we're here, flush scanAlloc, since we have to call
 	// revise anyway.
-	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
-	c.local_scan = 0
+	atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc))
+	c.scanAlloc = 0
 
 	if trace.enabled {
 		// heap_live changed.
@@ -215,8 +215,8 @@ func (c *mcache) refill(spc spanClass) {
 	c.alloc[spc] = s
 }
 
-// largeAlloc allocates a span for a large object.
-func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
+// allocLarge allocates a span for a large object.
+func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) *mspan {
 	if size+_PageSize < size {
 		throw("out of memory")
 	}
@@ -235,8 +235,8 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
 	if s == nil {
 		throw("out of memory")
 	}
-	c.local_largealloc += npages * pageSize
-	c.local_nlargealloc++
+	c.largeAlloc += npages * pageSize
+	c.largeAllocCount++
 
 	// Update heap_live and revise pacing if needed.
 	atomic.Xadd64(&memstats.heap_live, int64(npages*pageSize))
@@ -257,9 +257,9 @@ func (c *mcache) largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
 }
 
 func (c *mcache) releaseAll() {
-	// Take this opportunity to flush local_scan.
-	atomic.Xadd64(&memstats.heap_scan, int64(c.local_scan))
-	c.local_scan = 0
+	// Take this opportunity to flush scanAlloc.
+	atomic.Xadd64(&memstats.heap_scan, int64(c.scanAlloc))
+	c.scanAlloc = 0
 
 	sg := mheap_.sweepgen
 	for i := range c.alloc {
@@ -267,7 +267,7 @@ func (c *mcache) releaseAll() {
 		if s != &emptymspan {
 			// Adjust nsmallalloc in case the span wasn't fully allocated.
 			n := uintptr(s.nelems) - uintptr(s.allocCount)
-			c.local_nsmallalloc[spanClass(i).sizeclass()] -= n
+			c.smallAllocCount[spanClass(i).sizeclass()] -= n
 			if s.sweepgen != sg+1 {
 				// refill conservatively counted unallocated slots in heap_live.
 				// Undo this.
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 55554c117c..540c376f1c 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -2086,16 +2086,16 @@ func gcMark(start_time int64) {
 	// Update the marked heap stat.
 	memstats.heap_marked = work.bytesMarked
 
-	// Flush local_scan from each mcache since we're about to modify
-	// heap_scan directly. If we were to flush this later, then local_scan
+	// Flush scanAlloc from each mcache since we're about to modify
+	// heap_scan directly. If we were to flush this later, then scanAlloc
 	// might have incorrect information.
 	for _, p := range allp {
 		c := p.mcache
 		if c == nil {
 			continue
 		}
-		memstats.heap_scan += uint64(c.local_scan)
-		c.local_scan = 0
+		memstats.heap_scan += uint64(c.scanAlloc)
+		c.scanAlloc = 0
 	}
 
 	// Update other GC heap size stats. This must happen after
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 6b8c56ce35..7103b08455 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -503,7 +503,7 @@ func (s *mspan) sweep(preserve bool) bool {
 			// wasn't totally filled, but then swept, still has all of its
 			// free slots zeroed.
 			s.needzero = 1
-			c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed)
+			c.smallFreeCount[spc.sizeclass()] += uintptr(nfreed)
 		}
 		if !preserve {
 			// The caller may not have removed this span from whatever
@@ -548,8 +548,8 @@ func (s *mspan) sweep(preserve bool) bool {
 			} else {
 				mheap_.freeSpan(s)
 			}
-			c.local_nlargefree++
-			c.local_largefree += size
+			c.largeFreeCount++
+			c.largeFree += size
 			return true
 		}
 
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 5eeb173640..64687c24e5 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -565,25 +565,25 @@ func updatememstats() {
 			continue
 		}
 		// Collect large allocation stats.
-		memstats.nmalloc += uint64(c.local_nlargealloc)
-		totalAlloc += uint64(c.local_largealloc)
-		totalFree += uint64(c.local_largefree)
-		memstats.nfree += uint64(c.local_nlargefree)
+		memstats.nmalloc += uint64(c.largeAllocCount)
+		totalAlloc += uint64(c.largeAlloc)
+		totalFree += uint64(c.largeFree)
+		memstats.nfree += uint64(c.largeFreeCount)
 
 		// Collect tiny allocation stats.
-		memstats.tinyallocs += uint64(c.local_tinyallocs)
+		memstats.tinyallocs += uint64(c.tinyAllocCount)
 
 		// Collect per-sizeclass stats.
 		for i := 0; i < _NumSizeClasses; i++ {
 			// Malloc stats.
-			memstats.nmalloc += uint64(c.local_nsmallalloc[i])
-			memstats.by_size[i].nmalloc += uint64(c.local_nsmallalloc[i])
-			totalAlloc += uint64(c.local_nsmallalloc[i]) * uint64(class_to_size[i])
+			memstats.nmalloc += uint64(c.smallAllocCount[i])
+			memstats.by_size[i].nmalloc += uint64(c.smallAllocCount[i])
+			totalAlloc += uint64(c.smallAllocCount[i]) * uint64(class_to_size[i])
 
 			// Free stats.
-			memstats.nfree += uint64(c.local_nsmallfree[i])
-			memstats.by_size[i].nfree += uint64(c.local_nsmallfree[i])
-			smallFree += uint64(c.local_nsmallfree[i]) * uint64(class_to_size[i])
+			memstats.nfree += uint64(c.smallFreeCount[i])
+			memstats.by_size[i].nfree += uint64(c.smallFreeCount[i])
+			smallFree += uint64(c.smallFreeCount[i]) * uint64(class_to_size[i])
 		}
 	}
 
diff --git a/src/runtime/pprof/mprof_test.go b/src/runtime/pprof/mprof_test.go
index f253f07def..c11a45fd69 100644
--- a/src/runtime/pprof/mprof_test.go
+++ b/src/runtime/pprof/mprof_test.go
@@ -70,7 +70,7 @@ func TestMemoryProfiler(t *testing.T) {
 		runtime.MemProfileRate = oldRate
 	}()
 
-	// Allocate a meg to ensure that mcache.next_sample is updated to 1.
+	// Allocate a meg to ensure that mcache.nextSample is updated to 1.
 	for i := 0; i < 1024; i++ {
 		memSink = make([]byte, 1024)
 	}
-- 
cgit v1.3


From d39a89fd5843f535d634620d27110b320431f584 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Thu, 6 Aug 2020 21:59:13 +0000
Subject: runtime,runtime/metrics: add metric for distribution of GC pauses

For #37112.

Change-Id: Ibb0425c9c582ae3da3b2662d5bbe830d7df9079c
Reviewed-on: https://go-review.googlesource.com/c/go/+/247047
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/metrics.go             |  9 +++++++++
 src/runtime/metrics/description.go |  5 +++++
 src/runtime/metrics/doc.go         |  3 +++
 src/runtime/metrics_test.go        | 22 ++++++++++++++++++++++
 src/runtime/mgc.go                 |  3 +++
 src/runtime/mstats.go              | 12 ++++++++++++
 6 files changed, 54 insertions(+)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go
index 2be38ccaaa..0e391472b2 100644
--- a/src/runtime/metrics.go
+++ b/src/runtime/metrics.go
@@ -102,6 +102,15 @@ func initMetrics() {
 				out.scalar = in.heapStats.numObjects
 			},
 		},
+		"/gc/pauses:seconds": {
+			compute: func(_ *statAggregate, out *metricValue) {
+				hist := out.float64HistOrInit(timeHistBuckets)
+				hist.counts[len(hist.counts)-1] = atomic.Load64(&memstats.gcPauseDist.overflow)
+				for i := range hist.buckets {
+					hist.counts[i] = atomic.Load64(&memstats.gcPauseDist.counts[i])
+				}
+			},
+		},
 		"/memory/classes/heap/free:bytes": {
 			deps: makeStatDepSet(heapStatsDep),
 			compute: func(in *statAggregate, out *metricValue) {
diff --git a/src/runtime/metrics/description.go b/src/runtime/metrics/description.go
index e43904fc7d..47959e467c 100644
--- a/src/runtime/metrics/description.go
+++ b/src/runtime/metrics/description.go
@@ -88,6 +88,11 @@ var allDesc = []Description{
 		Description: "Number of objects, live or unswept, occupying heap memory.",
 		Kind:        KindUint64,
 	},
+	{
+		Name:        "/gc/pauses:seconds",
+		Description: "Distribution individual GC-related stop-the-world pause latencies.",
+		Kind:        KindFloat64Histogram,
+	},
 	{
 		Name:        "/memory/classes/heap/free:bytes",
 		Description: "Memory that is available for allocation, and may be returned to the underlying system.",
diff --git a/src/runtime/metrics/doc.go b/src/runtime/metrics/doc.go
index 5045a5b4c1..1e12ade5a1 100644
--- a/src/runtime/metrics/doc.go
+++ b/src/runtime/metrics/doc.go
@@ -65,6 +65,9 @@ Supported metrics
 	/gc/heap/objects:objects
 		Number of objects, live or unswept, occupying heap memory.
 
+	/gc/pauses:seconds
+		Distribution individual GC-related stop-the-world pause latencies.
+
 	/memory/classes/heap/free:bytes
 		Memory that is available for allocation, and may be returned
 		to the underlying system.
diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go
index 1a30810544..7b3132bc30 100644
--- a/src/runtime/metrics_test.go
+++ b/src/runtime/metrics_test.go
@@ -90,6 +90,11 @@ func TestReadMetricsConsistency(t *testing.T) {
 	// things (e.g. allocating) so what we read can't reasonably compared
 	// to runtime values.
 
+	// Run a few GC cycles to get some of the stats to be non-zero.
+	runtime.GC()
+	runtime.GC()
+	runtime.GC()
+
 	// Read all the supported metrics through the metrics package.
 	descs, samples := prepareAllMetricsSamples()
 	metrics.Read(samples)
@@ -102,6 +107,10 @@ func TestReadMetricsConsistency(t *testing.T) {
 		alloc, free *metrics.Float64Histogram
 		total       uint64
 	}
+	var gc struct {
+		numGC  uint64
+		pauses uint64
+	}
 	for i := range samples {
 		kind := samples[i].Value.Kind()
 		if want := descs[samples[i].Name].Kind; kind != want {
@@ -128,6 +137,14 @@ func TestReadMetricsConsistency(t *testing.T) {
 			objects.alloc = samples[i].Value.Float64Histogram()
 		case "/gc/heap/frees-by-size:objects":
 			objects.free = samples[i].Value.Float64Histogram()
+		case "/gc/cycles:gc-cycles":
+			gc.numGC = samples[i].Value.Uint64()
+		case "/gc/pauses:seconds":
+			h := samples[i].Value.Float64Histogram()
+			gc.pauses = 0
+			for i := range h.Counts {
+				gc.pauses += h.Counts[i]
+			}
 		}
 	}
 	if totalVirtual.got != totalVirtual.want {
@@ -159,6 +176,11 @@ func TestReadMetricsConsistency(t *testing.T) {
 			}
 		}
 	}
+	// The current GC has at least 2 pauses per GC.
+	// Check to see if that value makes sense.
+	if gc.pauses < gc.numGC*2 {
+		t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2)
+	}
 }
 
 func BenchmarkReadMetricsLatency(b *testing.B) {
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 540c376f1c..b0ab0ae6bb 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1418,6 +1418,7 @@ func gcStart(trigger gcTrigger) {
 		now = startTheWorldWithSema(trace.enabled)
 		work.pauseNS += now - work.pauseStart
 		work.tMark = now
+		memstats.gcPauseDist.record(now - work.pauseStart)
 	})
 
 	// Release the world sema before Gosched() in STW mode
@@ -1565,6 +1566,7 @@ top:
 		systemstack(func() {
 			now := startTheWorldWithSema(true)
 			work.pauseNS += now - work.pauseStart
+			memstats.gcPauseDist.record(now - work.pauseStart)
 		})
 		semrelease(&worldsema)
 		goto top
@@ -1677,6 +1679,7 @@ func gcMarkTermination(nextTriggerRatio float64) {
 	unixNow := sec*1e9 + int64(nsec)
 	work.pauseNS += now - work.pauseStart
 	work.tEnd = now
+	memstats.gcPauseDist.record(now - work.pauseStart)
 	atomic.Store64(&memstats.last_gc_unix, uint64(unixNow)) // must be Unix time to make sense to user
 	atomic.Store64(&memstats.last_gc_nanotime, uint64(now)) // monotonic time for us
 	memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(work.pauseNS)
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 07f466ec49..e0a417d213 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -157,6 +157,14 @@ type mstats struct {
 
 	// heapStats is a set of statistics
 	heapStats consistentHeapStats
+
+	_ uint32 // ensure gcPauseDist is aligned
+
+	// gcPauseDist represents the distribution of all GC-related
+	// application pauses in the runtime.
+	//
+	// Each individual pause is counted separately, unlike pause_ns.
+	gcPauseDist timeHistogram
 }
 
 var memstats mstats
@@ -443,6 +451,10 @@ func init() {
 		println(offset)
 		throw("memstats.heapStats not aligned to 8 bytes")
 	}
+	if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 {
+		println(offset)
+		throw("memstats.gcPauseDist not aligned to 8 bytes")
+	}
 	// Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g.
 	// [3]heapStatsDelta) to be 8-byte aligned.
 	if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 {
-- 
cgit v1.3


From e1faebe7b40c23811a6025ed104d3ce9882f0c3b Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Tue, 13 Oct 2020 12:39:13 -0400
Subject: runtime: manage gcBgMarkWorkers with a global pool

Background mark workers perform per-P marking work. Currently each
worker is assigned a P at creation time. The worker "attaches" to the P
via p.gcBgMarkWorker, making itself (usually) available to
findRunnableGCWorker for scheduling GC work.

While running gcMarkDone, the worker "detaches" from the P (by clearing
p.gcBgMarkWorker), since it may park for other reasons and should not be
scheduled by findRunnableGCWorker.

Unfortunately, this design is complex and difficult to reason about. We
simplify things by changing the design to eliminate the hard P
attachment. Rather than workers always performing work from the same P,
workers perform work for whichever P they find themselves on. On park,
the workers are placed in a pool of free workers, which each P's
findRunnableGCWorker can use to run a worker for its P.

Now if a worker parks in gcMarkDone, a P may simply use another worker
from the pool to complete its own work.

The P's GC worker mode is used to communicate the mode to run to the
selected worker. It is also used to emit the appropriate worker
EvGoStart tracepoint. This is a slight change, as this G may be
preempted (e.g., in gcMarkDone). When it is rescheduled, the trace
viewer will show it as a normal goroutine again. It is currently a bit
difficult to connect to the original worker tracepoint, as the viewer
does not display the goid for the original worker (though the data is in
the trace file).

Change-Id: Id7bd3a364dc18a4d2b1c99c4dc4810fae1293c1b
Reviewed-on: https://go-review.googlesource.com/c/go/+/262348
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Trust: Michael Pratt <mpratt@google.com>
---
 src/runtime/mgc.go      | 222 ++++++++++++++++++++++++++----------------------
 src/runtime/proc.go     |  69 +++++++++------
 src/runtime/runtime2.go |  19 ++++-
 src/runtime/trace.go    |   2 +-
 4 files changed, 182 insertions(+), 130 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index b0ab0ae6bb..fabb846a74 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -290,10 +290,14 @@ func setGCPhase(x uint32) {
 type gcMarkWorkerMode int
 
 const (
+	// gcMarkWorkerNotWorker indicates that the next scheduled G is not
+	// starting work and the mode should be ignored.
+	gcMarkWorkerNotWorker gcMarkWorkerMode = iota
+
 	// gcMarkWorkerDedicatedMode indicates that the P of a mark
 	// worker is dedicated to running that mark worker. The mark
 	// worker should run without preemption.
-	gcMarkWorkerDedicatedMode gcMarkWorkerMode = iota
+	gcMarkWorkerDedicatedMode
 
 	// gcMarkWorkerFractionalMode indicates that a P is currently
 	// running the "fractional" mark worker. The fractional worker
@@ -313,6 +317,7 @@ const (
 // gcMarkWorkerModeStrings are the strings labels of gcMarkWorkerModes
 // to use in execution traces.
 var gcMarkWorkerModeStrings = [...]string{
+	"Not worker",
 	"GC (dedicated)",
 	"GC (fractional)",
 	"GC (idle)",
@@ -708,18 +713,12 @@ func (c *gcControllerState) enlistWorker() {
 	}
 }
 
-// findRunnableGCWorker returns the background mark worker for _p_ if it
+// findRunnableGCWorker returns a background mark worker for _p_ if it
 // should be run. This must only be called when gcBlackenEnabled != 0.
 func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
 	if gcBlackenEnabled == 0 {
 		throw("gcControllerState.findRunnable: blackening not enabled")
 	}
-	if _p_.gcBgMarkWorker == 0 {
-		// The mark worker associated with this P is blocked
-		// performing a mark transition. We can't run it
-		// because it may be on some other run or wait queue.
-		return nil
-	}
 
 	if !gcMarkWorkAvailable(_p_) {
 		// No work to be done right now. This can happen at
@@ -729,15 +728,35 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
 		return nil
 	}
 
+	// Grab a worker before we commit to running below.
+	node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
+	if node == nil {
+		// There is at least one worker per P, so normally there are
+		// enough workers to run on all Ps, if necessary. However, once
+		// a worker enters gcMarkDone it may park without rejoining the
+		// pool, thus freeing a P with no corresponding worker.
+		// gcMarkDone never depends on another worker doing work, so it
+		// is safe to simply do nothing here.
+		//
+		// If gcMarkDone bails out without completing the mark phase,
+		// it will always do so with queued global work. Thus, that P
+		// will be immediately eligible to re-run the worker G it was
+		// just using, ensuring work can complete.
+		return nil
+	}
+
 	decIfPositive := func(ptr *int64) bool {
-		if *ptr > 0 {
-			if atomic.Xaddint64(ptr, -1) >= 0 {
+		for {
+			v := atomic.Loadint64(ptr)
+			if v <= 0 {
+				return false
+			}
+
+			// TODO: having atomic.Casint64 would be more pleasant.
+			if atomic.Cas64((*uint64)(unsafe.Pointer(ptr)), uint64(v), uint64(v-1)) {
 				return true
 			}
-			// We lost a race
-			atomic.Xaddint64(ptr, +1)
 		}
-		return false
 	}
 
 	if decIfPositive(&c.dedicatedMarkWorkersNeeded) {
@@ -746,6 +765,7 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
 		_p_.gcMarkWorkerMode = gcMarkWorkerDedicatedMode
 	} else if c.fractionalUtilizationGoal == 0 {
 		// No need for fractional workers.
+		gcBgMarkWorkerPool.push(&node.node)
 		return nil
 	} else {
 		// Is this P behind on the fractional utilization
@@ -755,14 +775,15 @@ func (c *gcControllerState) findRunnableGCWorker(_p_ *p) *g {
 		delta := nanotime() - gcController.markStartTime
 		if delta > 0 && float64(_p_.gcFractionalMarkTime)/float64(delta) > c.fractionalUtilizationGoal {
 			// Nope. No need to run a fractional worker.
+			gcBgMarkWorkerPool.push(&node.node)
 			return nil
 		}
 		// Run a fractional worker.
 		_p_.gcMarkWorkerMode = gcMarkWorkerFractionalMode
 	}
 
-	// Run the background mark worker
-	gp := _p_.gcBgMarkWorker.ptr()
+	// Run the background mark worker.
+	gp := node.gp.ptr()
 	casgstatus(gp, _Gwaiting, _Grunnable)
 	if trace.enabled {
 		traceGoUnpark(gp, 0)
@@ -1796,19 +1817,21 @@ func gcMarkTermination(nextTriggerRatio float64) {
 	}
 }
 
-// gcBgMarkStartWorkers prepares background mark worker goroutines.
-// These goroutines will not run until the mark phase, but they must
-// be started while the work is not stopped and from a regular G
-// stack. The caller must hold worldsema.
+// gcBgMarkStartWorkers prepares background mark worker goroutines. These
+// goroutines will not run until the mark phase, but they must be started while
+// the work is not stopped and from a regular G stack. The caller must hold
+// worldsema.
 func gcBgMarkStartWorkers() {
-	// Background marking is performed by per-P G's. Ensure that
-	// each P has a background GC G.
-	for _, p := range allp {
-		if p.gcBgMarkWorker == 0 {
-			go gcBgMarkWorker(p)
-			notetsleepg(&work.bgMarkReady, -1)
-			noteclear(&work.bgMarkReady)
-		}
+	// Background marking is performed by per-P G's. Ensure that each P has
+	// a background GC G.
+	//
+	// Worker Gs don't exit if gomaxprocs is reduced. If it is raised
+	// again, we can reuse the old workers; no need to create new workers.
+	for gcBgMarkWorkerCount < gomaxprocs {
+		go gcBgMarkWorker()
+		notetsleepg(&work.bgMarkReady, -1)
+		noteclear(&work.bgMarkReady)
+		gcBgMarkWorkerCount++
 	}
 }
 
@@ -1828,82 +1851,81 @@ func gcBgMarkPrepare() {
 	work.nwait = ^uint32(0)
 }
 
-func gcBgMarkWorker(_p_ *p) {
+// gcBgMarkWorker is an entry in the gcBgMarkWorkerPool. It points to a single
+// gcBgMarkWorker goroutine.
+type gcBgMarkWorkerNode struct {
+	// Unused workers are managed in a lock-free stack. This field must be first.
+	node lfnode
+
+	// The g of this worker.
+	gp guintptr
+
+	// Release this m on park. This is used to communicate with the unlock
+	// function, which cannot access the G's stack. It is unused outside of
+	// gcBgMarkWorker().
+	m muintptr
+}
+
+func gcBgMarkWorker() {
 	gp := getg()
 
-	type parkInfo struct {
-		m      muintptr // Release this m on park.
-		attach puintptr // If non-nil, attach to this p on park.
-	}
-	// We pass park to a gopark unlock function, so it can't be on
+	// We pass node to a gopark unlock function, so it can't be on
 	// the stack (see gopark). Prevent deadlock from recursively
 	// starting GC by disabling preemption.
 	gp.m.preemptoff = "GC worker init"
-	park := new(parkInfo)
+	node := new(gcBgMarkWorkerNode)
 	gp.m.preemptoff = ""
 
-	park.m.set(acquirem())
-	park.attach.set(_p_)
-	// Inform gcBgMarkStartWorkers that this worker is ready.
-	// After this point, the background mark worker is scheduled
-	// cooperatively by gcController.findRunnable. Hence, it must
-	// never be preempted, as this would put it into _Grunnable
-	// and put it on a run queue. Instead, when the preempt flag
-	// is set, this puts itself into _Gwaiting to be woken up by
-	// gcController.findRunnable at the appropriate time.
+	node.gp.set(gp)
+	node.m.set(acquirem())
+	// Inform gcBgMarkStartWorkers that this worker is ready. After this
+	// point, the background mark worker is scheduled cooperatively by
+	// gcController.findRunnableGCWorker. Hence, it must never be
+	// preempted, as this would put it into _Grunnable and put it on a run
+	// queue. Instead, when the preempt flag is set, this puts itself into
+	// _Gwaiting to be woken up by gcController.findRunnableGCWorker at the
+	// appropriate time.
 	notewakeup(&work.bgMarkReady)
 
 	for {
-		// Go to sleep until woken by gcController.findRunnable.
-		// We can't releasem yet since even the call to gopark
-		// may be preempted.
-		gopark(func(g *g, parkp unsafe.Pointer) bool {
-			park := (*parkInfo)(parkp)
+		// Go to sleep until woken by
+		// gcController.findRunnableGCWorker.  We can't releasem yet
+		// since even the call to gopark may be preempted.
+		gopark(func(g *g, nodep unsafe.Pointer) bool {
+			node := (*gcBgMarkWorkerNode)(nodep)
 
 			// The worker G is no longer running, so it's
 			// now safe to allow preemption.
-			releasem(park.m.ptr())
-
-			// If the worker isn't attached to its P,
-			// attach now. During initialization and after
-			// a phase change, the worker may have been
-			// running on a different P. As soon as we
-			// attach, the owner P may schedule the
-			// worker, so this must be done after the G is
-			// stopped.
-			if park.attach != 0 {
-				p := park.attach.ptr()
-				park.attach.set(nil)
-				// cas the worker because we may be
-				// racing with a new worker starting
-				// on this P.
-				if !p.gcBgMarkWorker.cas(0, guintptr(unsafe.Pointer(g))) {
-					// The P got a new worker.
-					// Exit this worker.
-					return false
-				}
-			}
+			releasem(node.m.ptr())
+
+			// Release this G to the pool.
+			gcBgMarkWorkerPool.push(&node.node)
+			// Note that at this point, the G may immediately be
+			// rescheduled and may be running.
+
 			return true
-		}, unsafe.Pointer(park), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
+		}, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
 
-		// Loop until the P dies and disassociates this
-		// worker (the P may later be reused, in which case
-		// it will get a new worker) or we failed to associate.
-		if _p_.gcBgMarkWorker.ptr() != gp {
-			break
-		}
+		// Preemption must not occur here, or another G might see
+		// p.gcMarkWorkerMode.
 
 		// Disable preemption so we can use the gcw. If the
 		// scheduler wants to preempt us, we'll stop draining,
 		// dispose the gcw, and then preempt.
-		park.m.set(acquirem())
+		node.m.set(acquirem())
+		pp := gp.m.p.ptr()  // P can't change with preemption disabled.
 
 		if gcBlackenEnabled == 0 {
+			println("worker mode", pp.gcMarkWorkerMode)
 			throw("gcBgMarkWorker: blackening not enabled")
 		}
 
+		if pp.gcMarkWorkerMode == gcMarkWorkerNotWorker {
+			throw("gcBgMarkWorker: mode not set")
+		}
+
 		startTime := nanotime()
-		_p_.gcMarkWorkerStartTime = startTime
+		pp.gcMarkWorkerStartTime = startTime
 
 		decnwait := atomic.Xadd(&work.nwait, -1)
 		if decnwait == work.nproc {
@@ -1920,11 +1942,11 @@ func gcBgMarkWorker(_p_ *p) {
 			// disabled for mark workers, so it is safe to
 			// read from the G stack.
 			casgstatus(gp, _Grunning, _Gwaiting)
-			switch _p_.gcMarkWorkerMode {
+			switch pp.gcMarkWorkerMode {
 			default:
 				throw("gcBgMarkWorker: unexpected gcMarkWorkerMode")
 			case gcMarkWorkerDedicatedMode:
-				gcDrain(&_p_.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
+				gcDrain(&pp.gcw, gcDrainUntilPreempt|gcDrainFlushBgCredit)
 				if gp.preempt {
 					// We were preempted. This is
 					// a useful signal to kick
@@ -1933,7 +1955,7 @@ func gcBgMarkWorker(_p_ *p) {
 					// somewhere else.
 					lock(&sched.lock)
 					for {
-						gp, _ := runqget(_p_)
+						gp, _ := runqget(pp)
 						if gp == nil {
 							break
 						}
@@ -1943,24 +1965,24 @@ func gcBgMarkWorker(_p_ *p) {
 				}
 				// Go back to draining, this time
 				// without preemption.
-				gcDrain(&_p_.gcw, gcDrainFlushBgCredit)
+				gcDrain(&pp.gcw, gcDrainFlushBgCredit)
 			case gcMarkWorkerFractionalMode:
-				gcDrain(&_p_.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+				gcDrain(&pp.gcw, gcDrainFractional|gcDrainUntilPreempt|gcDrainFlushBgCredit)
 			case gcMarkWorkerIdleMode:
-				gcDrain(&_p_.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
+				gcDrain(&pp.gcw, gcDrainIdle|gcDrainUntilPreempt|gcDrainFlushBgCredit)
 			}
 			casgstatus(gp, _Gwaiting, _Grunning)
 		})
 
 		// Account for time.
 		duration := nanotime() - startTime
-		switch _p_.gcMarkWorkerMode {
+		switch pp.gcMarkWorkerMode {
 		case gcMarkWorkerDedicatedMode:
 			atomic.Xaddint64(&gcController.dedicatedMarkTime, duration)
 			atomic.Xaddint64(&gcController.dedicatedMarkWorkersNeeded, 1)
 		case gcMarkWorkerFractionalMode:
 			atomic.Xaddint64(&gcController.fractionalMarkTime, duration)
-			atomic.Xaddint64(&_p_.gcFractionalMarkTime, duration)
+			atomic.Xaddint64(&pp.gcFractionalMarkTime, duration)
 		case gcMarkWorkerIdleMode:
 			atomic.Xaddint64(&gcController.idleMarkTime, duration)
 		}
@@ -1969,31 +1991,31 @@ func gcBgMarkWorker(_p_ *p) {
 		// of work?
 		incnwait := atomic.Xadd(&work.nwait, +1)
 		if incnwait > work.nproc {
-			println("runtime: p.gcMarkWorkerMode=", _p_.gcMarkWorkerMode,
+			println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode,
 				"work.nwait=", incnwait, "work.nproc=", work.nproc)
 			throw("work.nwait > work.nproc")
 		}
 
+		// We'll releasem after this point and thus this P may run
+		// something else. We must clear the worker mode to avoid
+		// attributing the mode to a different (non-worker) G in
+		// traceGoStart.
+		pp.gcMarkWorkerMode = gcMarkWorkerNotWorker
+
 		// If this worker reached a background mark completion
 		// point, signal the main GC goroutine.
 		if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
-			// Make this G preemptible and disassociate it
-			// as the worker for this P so
-			// findRunnableGCWorker doesn't try to
-			// schedule it.
-			_p_.gcBgMarkWorker.set(nil)
-			releasem(park.m.ptr())
+			// Make this G preemptible since we are done with per-P
+			// work.
+			releasem(node.m.ptr())
 
 			gcMarkDone()
 
-			// Disable preemption and prepare to reattach
-			// to the P.
+			// Disable preemption and prepare to park.
 			//
-			// We may be running on a different P at this
-			// point, so we can't reattach until this G is
-			// parked.
-			park.m.set(acquirem())
-			park.attach.set(_p_)
+			// Note that we may be running on a different P at this
+			// point, so we can't use pp.
+			node.m.set(acquirem())
 		}
 	}
 }
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 64c891d007..c97f4820da 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -2554,14 +2554,17 @@ stop:
 	// We have nothing to do. If we're in the GC mark phase, can
 	// safely scan and blacken objects, and have work to do, run
 	// idle-time marking rather than give up the P.
-	if gcBlackenEnabled != 0 && _p_.gcBgMarkWorker != 0 && gcMarkWorkAvailable(_p_) {
-		_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
-		gp := _p_.gcBgMarkWorker.ptr()
-		casgstatus(gp, _Gwaiting, _Grunnable)
-		if trace.enabled {
-			traceGoUnpark(gp, 0)
+	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {
+		node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
+		if node != nil {
+			_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
+			gp := node.gp.ptr()
+			casgstatus(gp, _Gwaiting, _Grunnable)
+			if trace.enabled {
+				traceGoUnpark(gp, 0)
+			}
+			return gp, false
 		}
-		return gp, false
 	}
 
 	delta := int64(-1)
@@ -2681,12 +2684,33 @@ stop:
 	}
 
 	// Check for idle-priority GC work again.
-	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(nil) {
+	//
+	// N.B. Since we have no P, gcBlackenEnabled may change at any time; we
+	// must check again after acquiring a P.
+	if atomic.Load(&gcBlackenEnabled) != 0 && gcMarkWorkAvailable(nil) {
+		// Work is available; we can start an idle GC worker only if
+		// there is an available P and available worker G.
+		//
+		// We can attempt to acquire these in either order. Workers are
+		// almost always available (see comment in findRunnableGCWorker
+		// for the one case there may be none). Since we're slightly
+		// less likely to find a P, check for that first.
 		lock(&sched.lock)
+		var node *gcBgMarkWorkerNode
 		_p_ = pidleget()
-		if _p_ != nil && _p_.gcBgMarkWorker == 0 {
-			pidleput(_p_)
-			_p_ = nil
+		if _p_ != nil {
+			// Now that we own a P, gcBlackenEnabled can't change
+			// (as it requires STW).
+			if gcBlackenEnabled != 0 {
+				node = (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
+				if node == nil {
+					pidleput(_p_)
+					_p_ = nil
+				}
+			} else {
+				pidleput(_p_)
+				_p_ = nil
+			}
 		}
 		unlock(&sched.lock)
 		if _p_ != nil {
@@ -2695,8 +2719,15 @@ stop:
 				_g_.m.spinning = true
 				atomic.Xadd(&sched.nmspinning, 1)
 			}
-			// Go back to idle GC check.
-			goto stop
+
+			// Run the idle worker.
+			_p_.gcMarkWorkerMode = gcMarkWorkerIdleMode
+			gp := node.gp.ptr()
+			casgstatus(gp, _Gwaiting, _Grunnable)
+			if trace.enabled {
+				traceGoUnpark(gp, 0)
+			}
+			return gp, false
 		}
 	}
 
@@ -4547,18 +4578,6 @@ func (pp *p) destroy() {
 		unlock(&pp.timersLock)
 		unlock(&plocal.timersLock)
 	}
-	// If there's a background worker, make it runnable and put
-	// it on the global queue so it can clean itself up.
-	if gp := pp.gcBgMarkWorker.ptr(); gp != nil {
-		casgstatus(gp, _Gwaiting, _Grunnable)
-		if trace.enabled {
-			traceGoUnpark(gp, 0)
-		}
-		globrunqput(gp)
-		// This assignment doesn't race because the
-		// world is stopped.
-		pp.gcBgMarkWorker.set(nil)
-	}
 	// Flush p's write barrier buffer.
 	if gcphase != _GCoff {
 		wbBufFlush1(pp)
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 2dbc0efca3..82fedd804b 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -656,11 +656,15 @@ type p struct {
 	// Per-P GC state
 	gcAssistTime         int64    // Nanoseconds in assistAlloc
 	gcFractionalMarkTime int64    // Nanoseconds in fractional mark worker (atomic)
-	gcBgMarkWorker       guintptr // (atomic)
-	gcMarkWorkerMode     gcMarkWorkerMode
 
-	// gcMarkWorkerStartTime is the nanotime() at which this mark
-	// worker started.
+	// gcMarkWorkerMode is the mode for the next mark worker to run in.
+	// That is, this is used to communicate with the worker goroutine
+	// selected for immediate execution by
+	// gcController.findRunnableGCWorker. When scheduling other goroutines,
+	// this field must be set to gcMarkWorkerNotWorker.
+	gcMarkWorkerMode gcMarkWorkerMode
+	// gcMarkWorkerStartTime is the nanotime() at which the most recent
+	// mark worker started.
 	gcMarkWorkerStartTime int64
 
 	// gcw is this P's GC work buffer cache. The work buffer is
@@ -1073,6 +1077,13 @@ var (
 	// must be atomic. Length may change at safe points.
 	timerpMask pMask
 
+	// Pool of GC parked background workers. Entries are type
+	// *gcBgMarkWorkerNode.
+	gcBgMarkWorkerPool lfstack
+
+	// Total number of gcBgMarkWorker goroutines. Protected by worldsema.
+	gcBgMarkWorkerCount int32
+
 	// Information about what cpu features are available.
 	// Packages outside the runtime should not use these
 	// as they are not an external api.
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index d3ecd148be..bcd0b9d56c 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -1064,7 +1064,7 @@ func traceGoStart() {
 	_g_ := getg().m.curg
 	_p_ := _g_.m.p
 	_g_.traceseq++
-	if _g_ == _p_.ptr().gcBgMarkWorker.ptr() {
+	if _p_.ptr().gcMarkWorkerMode != gcMarkWorkerNotWorker {
 		traceEvent(traceEvGoStartLabel, -1, uint64(_g_.goid), _g_.traceseq, trace.markWorkerLabels[_p_.ptr().gcMarkWorkerMode])
 	} else if _g_.tracelastp == _p_ {
 		traceEvent(traceEvGoStartLocal, -1, uint64(_g_.goid))
-- 
cgit v1.3


From 256d729c0b272021a44f61f47cd2c9c6d9fb1722 Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Tue, 13 Oct 2020 18:11:26 -0400
Subject: runtime: simplify gcBgMarkWorker preemption

gcBgMarkWorker G's are primarily scheduled by findRunnableGCWorker, but
that no longer needs to be strictly enforced. Temporary preemption to a
runq is fine when the P is not in use.

We still releasem in gopark in the normal case for efficiency: if
gcDrain stops because gp.preempt is set, then gopark would always
preempt. That is fine, but inefficient, since it will reschedule simply
to park again. Thus, we keep releasem in unlockf to skip this extra
cycle.

Change-Id: I6d1a42e3ca41b76227142a6b5bfb376c9213e3c9
Reviewed-on: https://go-review.googlesource.com/c/go/+/262349
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Trust: Michael Pratt <mpratt@google.com>
---
 src/runtime/mgc.go | 67 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 45 insertions(+), 22 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index fabb846a74..9d2682f03c 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1829,8 +1829,12 @@ func gcBgMarkStartWorkers() {
 	// again, we can reuse the old workers; no need to create new workers.
 	for gcBgMarkWorkerCount < gomaxprocs {
 		go gcBgMarkWorker()
+
 		notetsleepg(&work.bgMarkReady, -1)
 		noteclear(&work.bgMarkReady)
+		// The worker is now guaranteed to be added to the pool before
+		// its P's next findRunnableGCWorker.
+
 		gcBgMarkWorkerCount++
 	}
 }
@@ -1877,32 +1881,55 @@ func gcBgMarkWorker() {
 	gp.m.preemptoff = ""
 
 	node.gp.set(gp)
+
 	node.m.set(acquirem())
-	// Inform gcBgMarkStartWorkers that this worker is ready. After this
-	// point, the background mark worker is scheduled cooperatively by
-	// gcController.findRunnableGCWorker. Hence, it must never be
-	// preempted, as this would put it into _Grunnable and put it on a run
-	// queue. Instead, when the preempt flag is set, this puts itself into
-	// _Gwaiting to be woken up by gcController.findRunnableGCWorker at the
-	// appropriate time.
 	notewakeup(&work.bgMarkReady)
+	// After this point, the background mark worker is generally scheduled
+	// cooperatively by gcController.findRunnableGCWorker. While performing
+	// work on the P, preemption is disabled because we are working on
+	// P-local work buffers. When the preempt flag is set, this puts itself
+	// into _Gwaiting to be woken up by gcController.findRunnableGCWorker
+	// at the appropriate time.
+	//
+	// When preemption is enabled (e.g., while in gcMarkDone), this worker
+	// may be preempted and schedule as a _Grunnable G from a runq. That is
+	// fine; it will eventually gopark again for further scheduling via
+	// findRunnableGCWorker.
+	//
+	// Since we disable preemption before notifying bgMarkReady, we
+	// guarantee that this G will be in the worker pool for the next
+	// findRunnableGCWorker. This isn't strictly necessary, but it reduces
+	// latency between _GCmark starting and the workers starting.
 
 	for {
 		// Go to sleep until woken by
-		// gcController.findRunnableGCWorker.  We can't releasem yet
-		// since even the call to gopark may be preempted.
+		// gcController.findRunnableGCWorker.
 		gopark(func(g *g, nodep unsafe.Pointer) bool {
 			node := (*gcBgMarkWorkerNode)(nodep)
 
-			// The worker G is no longer running, so it's
-			// now safe to allow preemption.
-			releasem(node.m.ptr())
+			if mp := node.m.ptr(); mp != nil {
+				// The worker G is no longer running; release
+				// the M.
+				//
+				// N.B. it is _safe_ to release the M as soon
+				// as we are no longer performing P-local mark
+				// work.
+				//
+				// However, since we cooperatively stop work
+				// when gp.preempt is set, if we releasem in
+				// the loop then the following call to gopark
+				// would immediately preempt the G. This is
+				// also safe, but inefficient: the G must
+				// schedule again only to enter gopark and park
+				// again. Thus, we defer the release until
+				// after parking the G.
+				releasem(mp)
+			}
 
 			// Release this G to the pool.
 			gcBgMarkWorkerPool.push(&node.node)
 			// Note that at this point, the G may immediately be
 			// rescheduled and may be running.
-
 			return true
 		}, unsafe.Pointer(node), waitReasonGCWorkerIdle, traceEvGoBlock, 0)
 
@@ -1913,7 +1940,7 @@ func gcBgMarkWorker() {
 		// scheduler wants to preempt us, we'll stop draining,
 		// dispose the gcw, and then preempt.
 		node.m.set(acquirem())
-		pp := gp.m.p.ptr()  // P can't change with preemption disabled.
+		pp := gp.m.p.ptr() // P can't change with preemption disabled.
 
 		if gcBlackenEnabled == 0 {
 			println("worker mode", pp.gcMarkWorkerMode)
@@ -2005,17 +2032,13 @@ func gcBgMarkWorker() {
 		// If this worker reached a background mark completion
 		// point, signal the main GC goroutine.
 		if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
-			// Make this G preemptible since we are done with per-P
-			// work.
+			// We don't need the P-local buffers here, allow
+			// preemption becuse we may schedule like a regular
+			// goroutine in gcMarkDone (block on locks, etc).
 			releasem(node.m.ptr())
+			node.m.set(nil)
 
 			gcMarkDone()
-
-			// Disable preemption and prepare to park.
-			//
-			// Note that we may be running on a different P at this
-			// point, so we can't use pp.
-			node.m.set(acquirem())
 		}
 	}
 }
-- 
cgit v1.3


From 6abbfc17c255c07134a69c3ca305231db80530ec Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Wed, 28 Oct 2020 18:06:05 -0400
Subject: runtime: add world-stopped assertions

Stopping the world is an implicit lock for many operations, so we should
assert the world is stopped in functions that require it.

This is enabled along with the rest of lock ranking, though it is a bit
orthogonal and likely cheap enough to enable all the time should we
choose.

Requiring a lock _or_ world stop is common, so that can be expressed as
well.

Updates #40677

Change-Id: If0a58544f4251d367f73c4120c9d39974c6cd091
Reviewed-on: https://go-review.googlesource.com/c/go/+/248577
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Trust: Michael Pratt <mpratt@google.com>
---
 src/runtime/heapdump.go     | 15 +++++++++
 src/runtime/lockrank_off.go | 16 +++++++++
 src/runtime/lockrank_on.go  | 79 +++++++++++++++++++++++++++++++++++++++++++++
 src/runtime/mcheckmark.go   |  2 ++
 src/runtime/mgc.go          |  2 ++
 src/runtime/mgcmark.go      |  4 +++
 src/runtime/mgcsweep.go     |  2 ++
 src/runtime/mstats.go       | 14 ++++++--
 src/runtime/proc.go         | 14 ++++++++
 9 files changed, 145 insertions(+), 3 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 33e224d587..2d531571aa 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -431,6 +431,9 @@ func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, o
 }
 
 func dumproots() {
+	// To protect mheap_.allspans.
+	assertWorldStopped()
+
 	// TODO(mwhudson): dump datamask etc from all objects
 	// data segment
 	dumpint(tagData)
@@ -468,6 +471,9 @@ func dumproots() {
 var freemark [_PageSize / 8]bool
 
 func dumpobjs() {
+	// To protect mheap_.allspans.
+	assertWorldStopped()
+
 	for _, s := range mheap_.allspans {
 		if s.state.get() != mSpanInUse {
 			continue
@@ -552,6 +558,8 @@ func dumpms() {
 
 //go:systemstack
 func dumpmemstats(m *MemStats) {
+	assertWorldStopped()
+
 	// These ints should be identical to the exported
 	// MemStats structure and should be ordered the same
 	// way too.
@@ -634,6 +642,9 @@ func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs,
 }
 
 func dumpmemprof() {
+	// To protect mheap_.allspans.
+	assertWorldStopped()
+
 	iterate_memprof(dumpmemprof_callback)
 	for _, s := range mheap_.allspans {
 		if s.state.get() != mSpanInUse {
@@ -655,6 +666,8 @@ func dumpmemprof() {
 var dumphdr = []byte("go1.7 heap dump\n")
 
 func mdump(m *MemStats) {
+	assertWorldStopped()
+
 	// make sure we're done sweeping
 	for _, s := range mheap_.allspans {
 		if s.state.get() == mSpanInUse {
@@ -676,6 +689,8 @@ func mdump(m *MemStats) {
 }
 
 func writeheapdump_m(fd uintptr, m *MemStats) {
+	assertWorldStopped()
+
 	_g_ := getg()
 	casgstatus(_g_.m.curg, _Grunning, _Gwaiting)
 	_g_.waitreason = waitReasonDumpingHeap
diff --git a/src/runtime/lockrank_off.go b/src/runtime/lockrank_off.go
index 40edf882ee..7dcd8f5fe9 100644
--- a/src/runtime/lockrank_off.go
+++ b/src/runtime/lockrank_off.go
@@ -46,3 +46,19 @@ func assertLockHeld(l *mutex) {
 //go:nosplit
 func assertRankHeld(r lockRank) {
 }
+
+//go:nosplit
+func worldStopped() {
+}
+
+//go:nosplit
+func worldStarted() {
+}
+
+//go:nosplit
+func assertWorldStopped() {
+}
+
+//go:nosplit
+func assertWorldStoppedOrLockHeld(l *mutex) {
+}
diff --git a/src/runtime/lockrank_on.go b/src/runtime/lockrank_on.go
index db7ff23a58..c25b3a4656 100644
--- a/src/runtime/lockrank_on.go
+++ b/src/runtime/lockrank_on.go
@@ -7,9 +7,14 @@
 package runtime
 
 import (
+	"runtime/internal/atomic"
 	"unsafe"
 )
 
+// worldIsStopped is accessed atomically to track world-stops. 1 == world
+// stopped.
+var worldIsStopped uint32
+
 // lockRankStruct is embedded in mutex
 type lockRankStruct struct {
 	// static lock ranking of the lock
@@ -284,3 +289,77 @@ func assertRankHeld(r lockRank) {
 		throw("not holding required lock!")
 	})
 }
+
+// worldStopped notes that the world is stopped.
+//
+// Caller must hold worldsema.
+//
+// nosplit to ensure it can be called in as many contexts as possible.
+//go:nosplit
+func worldStopped() {
+	if stopped := atomic.Xadd(&worldIsStopped, 1); stopped != 1 {
+		print("world stop count=", stopped, "\n")
+		throw("recursive world stop")
+	}
+}
+
+// worldStarted that the world is starting.
+//
+// Caller must hold worldsema.
+//
+// nosplit to ensure it can be called in as many contexts as possible.
+//go:nosplit
+func worldStarted() {
+	if stopped := atomic.Xadd(&worldIsStopped, -1); stopped != 0 {
+		print("world stop count=", stopped, "\n")
+		throw("released non-stopped world stop")
+	}
+}
+
+// nosplit to ensure it can be called in as many contexts as possible.
+//go:nosplit
+func checkWorldStopped() bool {
+	stopped := atomic.Load(&worldIsStopped)
+	if stopped > 1 {
+		print("inconsistent world stop count=", stopped, "\n")
+		throw("inconsistent world stop count")
+	}
+
+	return stopped == 1
+}
+
+// assertWorldStopped throws if the world is not stopped. It does not check
+// which M stopped the world.
+//
+// nosplit to ensure it can be called in as many contexts as possible.
+//go:nosplit
+func assertWorldStopped() {
+	if checkWorldStopped() {
+		return
+	}
+
+	throw("world not stopped")
+}
+
+// assertWorldStoppedOrLockHeld throws if the world is not stopped and the
+// passed lock is not held.
+//
+// nosplit to ensure it can be called in as many contexts as possible.
+//go:nosplit
+func assertWorldStoppedOrLockHeld(l *mutex) {
+	if checkWorldStopped() {
+		return
+	}
+
+	gp := getg()
+	systemstack(func() {
+		held := checkLockHeld(gp, l)
+		if !held {
+			printlock()
+			print("caller requires world stop or lock ", l, " (rank ", l.rank.String(), "), holding:\n")
+			println("<no world stop>")
+			printHeldLocks(gp)
+			throw("no world stop or required lock!")
+		}
+	})
+}
diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go
index c0b028d715..ba80ac1bdf 100644
--- a/src/runtime/mcheckmark.go
+++ b/src/runtime/mcheckmark.go
@@ -34,6 +34,8 @@ var useCheckmark = false
 //
 // The world must be stopped.
 func startCheckmarks() {
+	assertWorldStopped()
+
 	// Clear all checkmarks.
 	for _, ai := range mheap_.allArenas {
 		arena := mheap_.arenas[ai.l1()][ai.l2()]
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 9d2682f03c..fb3c149942 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -2164,6 +2164,8 @@ func gcMark(start_time int64) {
 //
 //go:systemstack
 func gcSweep(mode gcMode) {
+	assertWorldStopped()
+
 	if gcphase != _GCoff {
 		throw("gcSweep being done but phase is not GCoff")
 	}
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index c71c0e58d3..5a24cdac88 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -54,6 +54,8 @@ const (
 //
 // The world must be stopped.
 func gcMarkRootPrepare() {
+	assertWorldStopped()
+
 	work.nFlushCacheRoots = 0
 
 	// Compute how many data and BSS root blocks there are.
@@ -1535,6 +1537,8 @@ func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) {
 //
 // The world must be stopped.
 func gcMarkTinyAllocs() {
+	assertWorldStopped()
+
 	for _, p := range allp {
 		c := p.mcache
 		if c == nil || c.tiny == 0 {
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 9b77ce635c..8391435630 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -123,6 +123,8 @@ func (h *mheap) nextSpanForSweep() *mspan {
 //
 //go:nowritebarrier
 func finishsweep_m() {
+	assertWorldStopped()
+
 	// Sweeping must be complete before marking commences, so
 	// sweep any unswept spans. If this is a concurrent GC, there
 	// shouldn't be any spans left to sweep, so this should finish
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index e0a417d213..3829355d7b 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -601,6 +601,8 @@ func readGCStats_m(pauses *[]uint64) {
 //
 //go:nowritebarrier
 func updatememstats() {
+	assertWorldStopped()
+
 	// Flush mcaches to mcentral before doing anything else.
 	//
 	// Flushing to the mcentral may in general cause stats to
@@ -706,6 +708,8 @@ func updatememstats() {
 //
 //go:nowritebarrier
 func flushmcache(i int) {
+	assertWorldStopped()
+
 	p := allp[i]
 	c := p.mcache
 	if c == nil {
@@ -721,6 +725,8 @@ func flushmcache(i int) {
 //
 //go:nowritebarrier
 func flushallmcaches() {
+	assertWorldStopped()
+
 	for i := 0; i < int(gomaxprocs); i++ {
 		flushmcache(i)
 	}
@@ -876,10 +882,10 @@ func (m *consistentHeapStats) release(c *mcache) {
 // unsafeRead aggregates the delta for this shard into out.
 //
 // Unsafe because it does so without any synchronization. The
-// only safe time to call this is if the world is stopped or
-// we're freezing the world or going down anyway (and we just
-// want _some_ estimate).
+// world must be stopped.
 func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) {
+	assertWorldStopped()
+
 	for i := range m.stats {
 		out.merge(&m.stats[i])
 	}
@@ -890,6 +896,8 @@ func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) {
 // Unsafe because the world must be stopped and values should
 // be donated elsewhere before clearing.
 func (m *consistentHeapStats) unsafeClear() {
+	assertWorldStopped()
+
 	for i := range m.stats {
 		m.stats[i] = heapStatsDelta{}
 	}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 939757f3a7..82284e6cd6 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -587,6 +587,9 @@ func schedinit() {
 
 	sched.maxmcount = 10000
 
+	// The world starts stopped.
+	worldStopped()
+
 	moduledataverify()
 	stackinit()
 	mallocinit()
@@ -617,6 +620,9 @@ func schedinit() {
 	}
 	unlock(&sched.lock)
 
+	// World is effectively started now, as P's can run.
+	worldStarted()
+
 	// For cgocheck > 1, we turn on the write barrier at all times
 	// and check all pointer writes. We can't do this until after
 	// procresize because the write barrier needs a P.
@@ -1082,9 +1088,13 @@ func stopTheWorldWithSema() {
 	if bad != "" {
 		throw(bad)
 	}
+
+	worldStopped()
 }
 
 func startTheWorldWithSema(emitTraceEvent bool) int64 {
+	assertWorldStopped()
+
 	mp := acquirem() // disable preemption because it can be holding p in a local var
 	if netpollinited() {
 		list := netpoll(0) // non-blocking
@@ -1105,6 +1115,8 @@ func startTheWorldWithSema(emitTraceEvent bool) int64 {
 	}
 	unlock(&sched.lock)
 
+	worldStarted()
+
 	for p1 != nil {
 		p := p1
 		p1 = p1.link.ptr()
@@ -4539,6 +4551,7 @@ func (pp *p) init(id int32) {
 // sched.lock must be held and the world must be stopped.
 func (pp *p) destroy() {
 	assertLockHeld(&sched.lock)
+	assertWorldStopped()
 
 	// Move all runnable goroutines to the global queue
 	for pp.runqhead != pp.runqtail {
@@ -4629,6 +4642,7 @@ func (pp *p) destroy() {
 // Returns list of Ps with local work, they need to be scheduled by the caller.
 func procresize(nprocs int32) *p {
 	assertLockHeld(&sched.lock)
+	assertWorldStopped()
 
 	old := gomaxprocs
 	if old < 0 || nprocs <= 0 {
-- 
cgit v1.3


From 9393b5bae5944acebed3ab6f995926b7de3ce429 Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Fri, 21 Aug 2020 11:59:55 -0400
Subject: runtime: add heap lock assertions

Some functions that required holding the heap lock _or_ world stop have
been simplified to simply requiring the heap lock. This is conceptually
simpler and taking the heap lock during world stop is guaranteed to not
contend. This was only done on functions already called on the
systemstack to avoid too many extra systemstack calls in GC.

Updates #40677

Change-Id: I15aa1dadcdd1a81aac3d2a9ecad6e7d0377befdc
Reviewed-on: https://go-review.googlesource.com/c/go/+/250262
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Trust: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go | 61 +++++++++++++++++++++++++++++++++++++++++-----
 src/runtime/malloc.go      |  2 ++
 src/runtime/mgc.go         |  4 +++
 src/runtime/mgcscavenge.go | 18 ++++++++++++++
 src/runtime/mheap.go       | 29 +++++++++++++++++-----
 src/runtime/mpagealloc.go  | 22 +++++++++++++++++
 src/runtime/mpagecache.go  | 14 ++++++++++-
 src/runtime/proc.go        |  2 ++
 8 files changed, 139 insertions(+), 13 deletions(-)

(limited to 'src/runtime/mgc.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 4ca0420d2a..44551dcaf1 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -743,7 +743,16 @@ func (c *PageCache) Alloc(npages uintptr) (uintptr, uintptr) {
 	return (*pageCache)(c).alloc(npages)
 }
 func (c *PageCache) Flush(s *PageAlloc) {
-	(*pageCache)(c).flush((*pageAlloc)(s))
+	cp := (*pageCache)(c)
+	sp := (*pageAlloc)(s)
+
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(sp.mheapLock)
+		cp.flush(sp)
+		unlock(sp.mheapLock)
+	})
 }
 
 // Expose chunk index type.
@@ -754,13 +763,41 @@ type ChunkIdx chunkIdx
 type PageAlloc pageAlloc
 
 func (p *PageAlloc) Alloc(npages uintptr) (uintptr, uintptr) {
-	return (*pageAlloc)(p).alloc(npages)
+	pp := (*pageAlloc)(p)
+
+	var addr, scav uintptr
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(pp.mheapLock)
+		addr, scav = pp.alloc(npages)
+		unlock(pp.mheapLock)
+	})
+	return addr, scav
 }
 func (p *PageAlloc) AllocToCache() PageCache {
-	return PageCache((*pageAlloc)(p).allocToCache())
+	pp := (*pageAlloc)(p)
+
+	var c PageCache
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(pp.mheapLock)
+		c = PageCache(pp.allocToCache())
+		unlock(pp.mheapLock)
+	})
+	return c
 }
 func (p *PageAlloc) Free(base, npages uintptr) {
-	(*pageAlloc)(p).free(base, npages)
+	pp := (*pageAlloc)(p)
+
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(pp.mheapLock)
+		pp.free(base, npages)
+		unlock(pp.mheapLock)
+	})
 }
 func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
 	return ChunkIdx((*pageAlloc)(p).start), ChunkIdx((*pageAlloc)(p).end)
@@ -768,6 +805,8 @@ func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
 func (p *PageAlloc) Scavenge(nbytes uintptr, mayUnlock bool) (r uintptr) {
 	pp := (*pageAlloc)(p)
 	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
 		lock(pp.mheapLock)
 		r = pp.scavenge(nbytes, mayUnlock)
 		unlock(pp.mheapLock)
@@ -926,7 +965,11 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 		addr := chunkBase(chunkIdx(i))
 
 		// Mark the chunk's existence in the pageAlloc.
-		p.grow(addr, pallocChunkBytes)
+		systemstack(func() {
+			lock(p.mheapLock)
+			p.grow(addr, pallocChunkBytes)
+			unlock(p.mheapLock)
+		})
 
 		// Initialize the bitmap and update pageAlloc metadata.
 		chunk := p.chunkOf(chunkIndex(addr))
@@ -957,13 +1000,19 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 		}
 
 		// Update heap metadata for the allocRange calls above.
-		p.update(addr, pallocChunkPages, false, false)
+		systemstack(func() {
+			lock(p.mheapLock)
+			p.update(addr, pallocChunkPages, false, false)
+			unlock(p.mheapLock)
+		})
 	}
+
 	systemstack(func() {
 		lock(p.mheapLock)
 		p.scavengeStartGen()
 		unlock(p.mheapLock)
 	})
+
 	return (*PageAlloc)(p)
 }
 
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 0563f49d17..4b798d129c 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -627,6 +627,8 @@ func mallocinit() {
 //
 // h must be locked.
 func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+	assertLockHeld(&h.lock)
+
 	n = alignUp(n, heapArenaBytes)
 
 	// First, try the arena pre-reservation.
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index fb3c149942..185d3201ca 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -821,6 +821,8 @@ func pollFractionalWorkerExit() bool {
 //
 // mheap_.lock must be held or the world must be stopped.
 func gcSetTriggerRatio(triggerRatio float64) {
+	assertWorldStoppedOrLockHeld(&mheap_.lock)
+
 	// Compute the next GC goal, which is when the allocated heap
 	// has grown by GOGC/100 over the heap marked by the last
 	// cycle.
@@ -960,6 +962,8 @@ func gcSetTriggerRatio(triggerRatio float64) {
 //
 // mheap_.lock must be held or the world must be stopped.
 func gcEffectiveGrowthRatio() float64 {
+	assertWorldStoppedOrLockHeld(&mheap_.lock)
+
 	egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked)
 	if egogc < 0 {
 		// Shouldn't happen, but just in case.
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 5843ada981..a242577bd9 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -397,6 +397,8 @@ func bgscavenge(c chan int) {
 //
 //go:systemstack
 func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
+	assertLockHeld(p.mheapLock)
+
 	var (
 		addrs addrRange
 		gen   uint32
@@ -446,6 +448,8 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeStartGen() {
+	assertLockHeld(p.mheapLock)
+
 	if debug.scavtrace > 0 {
 		printScavTrace(p.scav.gen, p.scav.released, false)
 	}
@@ -495,6 +499,8 @@ func (p *pageAlloc) scavengeStartGen() {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
+	assertLockHeld(p.mheapLock)
+
 	// Start by reserving the minimum.
 	r := p.scav.inUse.removeLast(p.scav.reservationBytes)
 
@@ -525,6 +531,8 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
+	assertLockHeld(p.mheapLock)
+
 	if r.size() == 0 || gen != p.scav.gen {
 		return
 	}
@@ -552,6 +560,8 @@ func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
+	assertLockHeld(p.mheapLock)
+
 	// Defensively check if we've recieved an empty address range.
 	// If so, just return.
 	if work.size() == 0 {
@@ -610,6 +620,8 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 		// If we found something, scavenge it and return!
 		if npages != 0 {
 			work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
+
+			assertLockHeld(p.mheapLock) // Must be locked on return.
 			return uintptr(npages) * pageSize, work
 		}
 	}
@@ -674,12 +686,16 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 		base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
 		if npages > 0 {
 			work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
+
+			assertLockHeld(p.mheapLock) // Must be locked on return.
 			return uintptr(npages) * pageSize, work
 		}
 
 		// We were fooled, so let's continue from where we left off.
 		work.limit = offAddr{chunkBase(candidateChunkIdx)}
 	}
+
+	assertLockHeld(p.mheapLock) // Must be locked on return.
 	return 0, work
 }
 
@@ -692,6 +708,8 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
+	assertLockHeld(p.mheapLock)
+
 	p.chunkOf(ci).scavenged.setRange(base, npages)
 
 	// Compute the full address for the start of the range.
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 14a73c0491..66a59cb999 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -483,10 +483,15 @@ func (s *mspan) layout() (size, n, total uintptr) {
 // indirect call from the fixalloc initializer, the compiler can't see
 // this.
 //
+// The heap lock must be held.
+//
 //go:nowritebarrierrec
 func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
 	h := (*mheap)(vh)
 	s := (*mspan)(p)
+
+	assertLockHeld(&h.lock)
+
 	if len(h.allspans) >= cap(h.allspans) {
 		n := 64 * 1024 / sys.PtrSize
 		if n < cap(h.allspans)*3/2 {
@@ -721,7 +726,7 @@ func (h *mheap) init() {
 //
 // reclaim implements the page-reclaimer half of the sweeper.
 //
-// h must NOT be locked.
+// h.lock must NOT be held.
 func (h *mheap) reclaim(npage uintptr) {
 	// TODO(austin): Half of the time spent freeing spans is in
 	// locking/unlocking the heap (even with low contention). We
@@ -804,6 +809,8 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 	// In particular, if a span were freed and merged concurrently
 	// with this probing heapArena.spans, it would be possible to
 	// observe arbitrary, stale span pointers.
+	assertLockHeld(&h.lock)
+
 	n0 := n
 	var nFreed uintptr
 	sg := h.sweepgen
@@ -858,6 +865,8 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 		traceGCSweepSpan((n0 - nFreed) * pageSize)
 		lock(&h.lock)
 	}
+
+	assertLockHeld(&h.lock) // Must be locked on return.
 	return nFreed
 }
 
@@ -1011,7 +1020,7 @@ func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
 // tryAllocMSpan attempts to allocate an mspan object from
 // the P-local cache, but may fail.
 //
-// h need not be locked.
+// h.lock need not be held.
 //
 // This caller must ensure that its P won't change underneath
 // it during this function. Currently to ensure that we enforce
@@ -1035,7 +1044,7 @@ func (h *mheap) tryAllocMSpan() *mspan {
 
 // allocMSpanLocked allocates an mspan object.
 //
-// h must be locked.
+// h.lock must be held.
 //
 // allocMSpanLocked must be called on the system stack because
 // its caller holds the heap lock. See mheap for details.
@@ -1044,6 +1053,8 @@ func (h *mheap) tryAllocMSpan() *mspan {
 //
 //go:systemstack
 func (h *mheap) allocMSpanLocked() *mspan {
+	assertLockHeld(&h.lock)
+
 	pp := getg().m.p.ptr()
 	if pp == nil {
 		// We don't have a p so just do the normal thing.
@@ -1065,7 +1076,7 @@ func (h *mheap) allocMSpanLocked() *mspan {
 
 // freeMSpanLocked free an mspan object.
 //
-// h must be locked.
+// h.lock must be held.
 //
 // freeMSpanLocked must be called on the system stack because
 // its caller holds the heap lock. See mheap for details.
@@ -1074,6 +1085,8 @@ func (h *mheap) allocMSpanLocked() *mspan {
 //
 //go:systemstack
 func (h *mheap) freeMSpanLocked(s *mspan) {
+	assertLockHeld(&h.lock)
+
 	pp := getg().m.p.ptr()
 	// First try to free the mspan directly to the cache.
 	if pp != nil && pp.mspancache.len < len(pp.mspancache.buf) {
@@ -1097,7 +1110,7 @@ func (h *mheap) freeMSpanLocked(s *mspan) {
 //
 // The returned span is fully initialized.
 //
-// h must not be locked.
+// h.lock must not be held.
 //
 // allocSpan must be called on the system stack both because it acquires
 // the heap lock and because it must block GC transitions.
@@ -1281,8 +1294,10 @@ HaveSpan:
 // Try to add at least npage pages of memory to the heap,
 // returning whether it worked.
 //
-// h must be locked.
+// h.lock must be held.
 func (h *mheap) grow(npage uintptr) bool {
+	assertLockHeld(&h.lock)
+
 	// We must grow the heap in whole palloc chunks.
 	ask := alignUp(npage, pallocChunkPages) * pageSize
 
@@ -1391,6 +1406,8 @@ func (h *mheap) freeManual(s *mspan, typ spanAllocType) {
 }
 
 func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
+	assertLockHeld(&h.lock)
+
 	switch s.state.get() {
 	case mSpanManual:
 		if s.allocCount != 0 {
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 2af1c97e0b..dac1f39969 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -349,6 +349,8 @@ func (p *pageAlloc) chunkOf(ci chunkIdx) *pallocData {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) grow(base, size uintptr) {
+	assertLockHeld(p.mheapLock)
+
 	// Round up to chunks, since we can't deal with increments smaller
 	// than chunks. Also, sysGrow expects aligned values.
 	limit := alignUp(base+size, pallocChunkBytes)
@@ -413,6 +415,8 @@ func (p *pageAlloc) grow(base, size uintptr) {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
+	assertLockHeld(p.mheapLock)
+
 	// base, limit, start, and end are inclusive.
 	limit := base + npages*pageSize - 1
 	sc, ec := chunkIndex(base), chunkIndex(limit)
@@ -499,6 +503,8 @@ func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) allocRange(base, npages uintptr) uintptr {
+	assertLockHeld(p.mheapLock)
+
 	limit := base + npages*pageSize - 1
 	sc, ec := chunkIndex(base), chunkIndex(limit)
 	si, ei := chunkPageIndex(base), chunkPageIndex(limit)
@@ -534,6 +540,8 @@ func (p *pageAlloc) allocRange(base, npages uintptr) uintptr {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) findMappedAddr(addr offAddr) offAddr {
+	assertLockHeld(p.mheapLock)
+
 	// If we're not in a test, validate first by checking mheap_.arenas.
 	// This is a fast path which is only safe to use outside of testing.
 	ai := arenaIndex(addr.addr())
@@ -568,6 +576,8 @@ func (p *pageAlloc) findMappedAddr(addr offAddr) offAddr {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) find(npages uintptr) (uintptr, offAddr) {
+	assertLockHeld(p.mheapLock)
+
 	// Search algorithm.
 	//
 	// This algorithm walks each level l of the radix tree from the root level
@@ -786,7 +796,13 @@ nextLevel:
 // should be ignored.
 //
 // p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (p *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
+	assertLockHeld(p.mheapLock)
+
 	// If the searchAddr refers to a region which has a higher address than
 	// any known chunk, then we know we're out of memory.
 	if chunkIndex(p.searchAddr.addr()) >= p.end {
@@ -841,7 +857,13 @@ Found:
 // free returns npages worth of memory starting at base back to the page heap.
 //
 // p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (p *pageAlloc) free(base, npages uintptr) {
+	assertLockHeld(p.mheapLock)
+
 	// If we're freeing pages below the p.searchAddr, update searchAddr.
 	if b := (offAddr{base}); b.lessThan(p.searchAddr) {
 		p.searchAddr = b
diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go
index 5f76501a1c..4b5c66d8d6 100644
--- a/src/runtime/mpagecache.go
+++ b/src/runtime/mpagecache.go
@@ -71,8 +71,14 @@ func (c *pageCache) allocN(npages uintptr) (uintptr, uintptr) {
 // into s. Then, it clears the cache, such that empty returns
 // true.
 //
-// p.mheapLock must be held or the world must be stopped.
+// p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (c *pageCache) flush(p *pageAlloc) {
+	assertLockHeld(p.mheapLock)
+
 	if c.empty() {
 		return
 	}
@@ -103,7 +109,13 @@ func (c *pageCache) flush(p *pageAlloc) {
 // chunk.
 //
 // p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (p *pageAlloc) allocToCache() pageCache {
+	assertLockHeld(p.mheapLock)
+
 	// If the searchAddr refers to a region which has a higher address than
 	// any known chunk, then we know we're out of memory.
 	if chunkIndex(p.searchAddr.addr()) >= p.end {
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 82284e6cd6..ced27ceb3a 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -4603,7 +4603,9 @@ func (pp *p) destroy() {
 			mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
 		}
 		pp.mspancache.len = 0
+		lock(&mheap_.lock)
 		pp.pcache.flush(&mheap_.pages)
+		unlock(&mheap_.lock)
 	})
 	freemcache(pp.mcache)
 	pp.mcache = nil
-- 
cgit v1.3