From 34835df04891a1d54394888b763af88f9476101d Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 9 Sep 2020 16:52:18 +0000
Subject: runtime: fix ReadMemStatsSlow's and CheckScavengedBits' chunk
 iteration

Both ReadMemStatsSlow and CheckScavengedBits iterate over the page
allocator's chunks but don't actually check if they exist. During the
development process the chunks index became sparse, so now this was a
possibility. If the runtime tests' heap is sparse we might end up
segfaulting in either one of these functions, though this will generally
be very rare.

The pattern here to return nil for a nonexistent chunk is also useful
elsewhere, so this change introduces tryChunkOf which won't throw, but
might return nil. It also updates the documentation of chunkOf.

Fixes #41296.

Change-Id: Id5ae0ca3234480de1724fdf2e3677eeedcf76fa0
Reviewed-on: https://go-review.googlesource.com/c/go/+/253777
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/mpagealloc.go | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'src/runtime/mpagealloc.go')

diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 8b3c62c375..c90a6378bd 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -326,7 +326,20 @@ func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
 	s.scav.scavLWM = maxSearchAddr
 }
 
+// tryChunkOf returns the bitmap data for the given chunk.
+//
+// Returns nil if the chunk data has not been mapped.
+func (s *pageAlloc) tryChunkOf(ci chunkIdx) *pallocData {
+	l2 := s.chunks[ci.l1()]
+	if l2 == nil {
+		return nil
+	}
+	return &l2[ci.l2()]
+}
+
 // chunkOf returns the chunk at the given chunk index.
+//
+// The chunk index must be valid or this method may throw.
 func (s *pageAlloc) chunkOf(ci chunkIdx) *pallocData {
 	return &s.chunks[ci.l1()][ci.l2()]
 }
-- 
cgit v1.3


From ad642727247383079c8546ca365172859641a800 Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Tue, 25 Aug 2020 12:34:02 -0400
Subject: runtime: rename pageAlloc receiver

The history of pageAlloc using 's' as a receiver are lost to the depths
of time (perhaps it used to be called summary?), but it doesn't make
much sense anymore. Rename it to 'p'.

Generated with:

$ cd src/runtime
$ grep -R -b "func (s \*pageAlloc" . | awk -F : '{ print $1 ":#" $2+6 }' | xargs -n 1 -I {} env GOROOT=$(pwd)/../../ gorename -offset {} -to p -v
$ grep -R -b "func (s \*pageAlloc" . | awk -F : '{ print $1 ":#" $2+6 }' | xargs -n 1 -I {} env GOROOT=$(pwd)/../../ GOARCH=386 gorename -offset {} -to p -v
$ GOROOT=$(pwd)/../../ gorename -offset mpagecache.go:#2397 -to p -v

($2+6 to advance past "func (".)

Plus manual comment fixups.

Change-Id: I2d521a1cbf6ebe2ef6aae92e654bfc33c63d1aa9
Reviewed-on: https://go-review.googlesource.com/c/go/+/250517
Trust: Michael Pratt <mpratt@google.com>
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
---
 src/runtime/mgcscavenge.go      | 102 ++++++++++----------
 src/runtime/mpagealloc.go       | 200 ++++++++++++++++++++--------------------
 src/runtime/mpagealloc_32bit.go |  14 +--
 src/runtime/mpagealloc_64bit.go |  30 +++---
 src/runtime/mpagecache.go       |  42 ++++-----
 5 files changed, 194 insertions(+), 194 deletions(-)

(limited to 'src/runtime/mpagealloc.go')

diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 9d6f551768..34646828e5 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -390,13 +390,13 @@ func bgscavenge(c chan int) {
 //
 // Returns the amount of memory scavenged in bytes.
 //
-// s.mheapLock must be held, but may be temporarily released if
+// p.mheapLock must be held, but may be temporarily released if
 // mayUnlock == true.
 //
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
 //
 //go:systemstack
-func (s *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
+func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
 	var (
 		addrs addrRange
 		gen   uint32
@@ -404,17 +404,17 @@ func (s *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
 	released := uintptr(0)
 	for released < nbytes {
 		if addrs.size() == 0 {
-			if addrs, gen = s.scavengeReserve(); addrs.size() == 0 {
+			if addrs, gen = p.scavengeReserve(); addrs.size() == 0 {
 				break
 			}
 		}
-		r, a := s.scavengeOne(addrs, nbytes-released, mayUnlock)
+		r, a := p.scavengeOne(addrs, nbytes-released, mayUnlock)
 		released += r
 		addrs = a
 	}
 	// Only unreserve the space which hasn't been scavenged or searched
 	// to ensure we always make progress.
-	s.scavengeUnreserve(addrs, gen)
+	p.scavengeUnreserve(addrs, gen)
 	return released
 }
 
@@ -440,46 +440,46 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
 // scavengeStartGen starts a new scavenge generation, resetting
 // the scavenger's search space to the full in-use address space.
 //
-// s.mheapLock must be held.
+// p.mheapLock must be held.
 //
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
 //
 //go:systemstack
-func (s *pageAlloc) scavengeStartGen() {
+func (p *pageAlloc) scavengeStartGen() {
 	if debug.scavtrace > 0 {
-		printScavTrace(s.scav.gen, s.scav.released, false)
+		printScavTrace(p.scav.gen, p.scav.released, false)
 	}
-	s.inUse.cloneInto(&s.scav.inUse)
+	p.inUse.cloneInto(&p.scav.inUse)
 
 	// Pick the new starting address for the scavenger cycle.
 	var startAddr offAddr
-	if s.scav.scavLWM.lessThan(s.scav.freeHWM) {
+	if p.scav.scavLWM.lessThan(p.scav.freeHWM) {
 		// The "free" high watermark exceeds the "scavenged" low watermark,
 		// so there are free scavengable pages in parts of the address space
 		// that the scavenger already searched, the high watermark being the
 		// highest one. Pick that as our new starting point to ensure we
 		// see those pages.
-		startAddr = s.scav.freeHWM
+		startAddr = p.scav.freeHWM
 	} else {
 		// The "free" high watermark does not exceed the "scavenged" low
 		// watermark. This means the allocator didn't free any memory in
 		// the range we scavenged last cycle, so we might as well continue
 		// scavenging from where we were.
-		startAddr = s.scav.scavLWM
+		startAddr = p.scav.scavLWM
 	}
-	s.scav.inUse.removeGreaterEqual(startAddr.addr())
+	p.scav.inUse.removeGreaterEqual(startAddr.addr())
 
-	// reservationBytes may be zero if s.inUse.totalBytes is small, or if
+	// reservationBytes may be zero if p.inUse.totalBytes is small, or if
 	// scavengeReservationShards is large. This case is fine as the scavenger
 	// will simply be turned off, but it does mean that scavengeReservationShards,
 	// in concert with pallocChunkBytes, dictates the minimum heap size at which
 	// the scavenger triggers. In practice this minimum is generally less than an
 	// arena in size, so virtually every heap has the scavenger on.
-	s.scav.reservationBytes = alignUp(s.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
-	s.scav.gen++
-	s.scav.released = 0
-	s.scav.freeHWM = minOffAddr
-	s.scav.scavLWM = maxOffAddr
+	p.scav.reservationBytes = alignUp(p.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
+	p.scav.gen++
+	p.scav.released = 0
+	p.scav.freeHWM = minOffAddr
+	p.scav.scavLWM = maxOffAddr
 }
 
 // scavengeReserve reserves a contiguous range of the address space
@@ -489,19 +489,19 @@ func (s *pageAlloc) scavengeStartGen() {
 //
 // Returns the reserved range and the scavenge generation number for it.
 //
-// s.mheapLock must be held.
+// p.mheapLock must be held.
 //
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
 //
 //go:systemstack
-func (s *pageAlloc) scavengeReserve() (addrRange, uint32) {
+func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
 	// Start by reserving the minimum.
-	r := s.scav.inUse.removeLast(s.scav.reservationBytes)
+	r := p.scav.inUse.removeLast(p.scav.reservationBytes)
 
 	// Return early if the size is zero; we don't want to use
 	// the bogus address below.
 	if r.size() == 0 {
-		return r, s.scav.gen
+		return r, p.scav.gen
 	}
 
 	// The scavenger requires that base be aligned to a
@@ -511,27 +511,27 @@ func (s *pageAlloc) scavengeReserve() (addrRange, uint32) {
 	newBase := alignDown(r.base.addr(), pallocChunkBytes)
 
 	// Remove from inUse however much extra we just pulled out.
-	s.scav.inUse.removeGreaterEqual(newBase)
+	p.scav.inUse.removeGreaterEqual(newBase)
 	r.base = offAddr{newBase}
-	return r, s.scav.gen
+	return r, p.scav.gen
 }
 
 // scavengeUnreserve returns an unscavenged portion of a range that was
 // previously reserved with scavengeReserve.
 //
-// s.mheapLock must be held.
+// p.mheapLock must be held.
 //
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
 //
 //go:systemstack
-func (s *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
-	if r.size() == 0 || gen != s.scav.gen {
+func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
+	if r.size() == 0 || gen != p.scav.gen {
 		return
 	}
 	if r.base.addr()%pallocChunkBytes != 0 {
 		throw("unreserving unaligned region")
 	}
-	s.scav.inUse.add(r)
+	p.scav.inUse.add(r)
 }
 
 // scavengeOne walks over address range work until it finds
@@ -545,13 +545,13 @@ func (s *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
 //
 // work's base address must be aligned to pallocChunkBytes.
 //
-// s.mheapLock must be held, but may be temporarily released if
+// p.mheapLock must be held, but may be temporarily released if
 // mayUnlock == true.
 //
-// Must run on the system stack because s.mheapLock must be held.
+// Must run on the system stack because p.mheapLock must be held.
 //
 //go:systemstack
-func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
+func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
 	// Defensively check if we've recieved an empty address range.
 	// If so, just return.
 	if work.size() == 0 {
@@ -586,12 +586,12 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 	// Helpers for locking and unlocking only if mayUnlock == true.
 	lockHeap := func() {
 		if mayUnlock {
-			lock(s.mheapLock)
+			lock(p.mheapLock)
 		}
 	}
 	unlockHeap := func() {
 		if mayUnlock {
-			unlock(s.mheapLock)
+			unlock(p.mheapLock)
 		}
 	}
 
@@ -602,14 +602,14 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 	// by subtracting 1.
 	maxAddr := work.limit.addr() - 1
 	maxChunk := chunkIndex(maxAddr)
-	if s.summary[len(s.summary)-1][maxChunk].max() >= uint(minPages) {
+	if p.summary[len(p.summary)-1][maxChunk].max() >= uint(minPages) {
 		// We only bother looking for a candidate if there at least
 		// minPages free pages at all.
-		base, npages := s.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
+		base, npages := p.chunkOf(maxChunk).findScavengeCandidate(chunkPageIndex(maxAddr), minPages, maxPages)
 
 		// If we found something, scavenge it and return!
 		if npages != 0 {
-			work.limit = offAddr{s.scavengeRangeLocked(maxChunk, base, npages)}
+			work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
 			return uintptr(npages) * pageSize, work
 		}
 	}
@@ -631,7 +631,7 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 			// that's fine. We're being optimistic anyway.
 
 			// Check quickly if there are enough free pages at all.
-			if s.summary[len(s.summary)-1][i].max() < uint(minPages) {
+			if p.summary[len(p.summary)-1][i].max() < uint(minPages) {
 				continue
 			}
 
@@ -641,7 +641,7 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 			// avoid races with heap growth. It may or may not be possible to also
 			// see a nil pointer in this case if we do race with heap growth, but
 			// just defensively ignore the nils. This operation is optimistic anyway.
-			l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&s.chunks[i.l1()])))
+			l2 := (*[1 << pallocChunksL2Bits]pallocData)(atomic.Loadp(unsafe.Pointer(&p.chunks[i.l1()])))
 			if l2 != nil && l2[i.l2()].hasScavengeCandidate(minPages) {
 				return i, true
 			}
@@ -670,10 +670,10 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 		}
 
 		// Find, verify, and scavenge if we can.
-		chunk := s.chunkOf(candidateChunkIdx)
+		chunk := p.chunkOf(candidateChunkIdx)
 		base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
 		if npages > 0 {
-			work.limit = offAddr{s.scavengeRangeLocked(candidateChunkIdx, base, npages)}
+			work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
 			return uintptr(npages) * pageSize, work
 		}
 
@@ -690,21 +690,21 @@ func (s *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 //
 // Returns the base address of the scavenged region.
 //
-// s.mheapLock must be held.
-func (s *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
-	s.chunkOf(ci).scavenged.setRange(base, npages)
+// p.mheapLock must be held.
+func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
+	p.chunkOf(ci).scavenged.setRange(base, npages)
 
 	// Compute the full address for the start of the range.
 	addr := chunkBase(ci) + uintptr(base)*pageSize
 
 	// Update the scavenge low watermark.
-	if oAddr := (offAddr{addr}); oAddr.lessThan(s.scav.scavLWM) {
-		s.scav.scavLWM = oAddr
+	if oAddr := (offAddr{addr}); oAddr.lessThan(p.scav.scavLWM) {
+		p.scav.scavLWM = oAddr
 	}
 
 	// Only perform the actual scavenging if we're not in a test.
 	// It's dangerous to do so otherwise.
-	if s.test {
+	if p.test {
 		return addr
 	}
 	sysUnused(unsafe.Pointer(addr), uintptr(npages)*pageSize)
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index c90a6378bd..560babed03 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -299,7 +299,7 @@ type pageAlloc struct {
 	test bool
 }
 
-func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
+func (p *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
 	if levelLogPages[0] > logMaxPackedValue {
 		// We can't represent 1<<levelLogPages[0] pages, the maximum number
 		// of pages we need to represent at the root level, in a summary, which
@@ -308,29 +308,29 @@ func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
 		print("runtime: summary max pages = ", maxPackedValue, "\n")
 		throw("root level max pages doesn't fit in summary")
 	}
-	s.sysStat = sysStat
+	p.sysStat = sysStat
 
-	// Initialize s.inUse.
-	s.inUse.init(sysStat)
+	// Initialize p.inUse.
+	p.inUse.init(sysStat)
 
 	// System-dependent initialization.
-	s.sysInit()
+	p.sysInit()
 
 	// Start with the searchAddr in a state indicating there's no free memory.
-	s.searchAddr = maxSearchAddr
+	p.searchAddr = maxSearchAddr
 
 	// Set the mheapLock.
-	s.mheapLock = mheapLock
+	p.mheapLock = mheapLock
 
 	// Initialize scavenge tracking state.
-	s.scav.scavLWM = maxSearchAddr
+	p.scav.scavLWM = maxSearchAddr
 }
 
 // tryChunkOf returns the bitmap data for the given chunk.
 //
 // Returns nil if the chunk data has not been mapped.
-func (s *pageAlloc) tryChunkOf(ci chunkIdx) *pallocData {
-	l2 := s.chunks[ci.l1()]
+func (p *pageAlloc) tryChunkOf(ci chunkIdx) *pallocData {
+	l2 := p.chunks[ci.l1()]
 	if l2 == nil {
 		return nil
 	}
@@ -340,15 +340,15 @@ func (s *pageAlloc) tryChunkOf(ci chunkIdx) *pallocData {
 // chunkOf returns the chunk at the given chunk index.
 //
 // The chunk index must be valid or this method may throw.
-func (s *pageAlloc) chunkOf(ci chunkIdx) *pallocData {
-	return &s.chunks[ci.l1()][ci.l2()]
+func (p *pageAlloc) chunkOf(ci chunkIdx) *pallocData {
+	return &p.chunks[ci.l1()][ci.l2()]
 }
 
 // grow sets up the metadata for the address range [base, base+size).
-// It may allocate metadata, in which case *s.sysStat will be updated.
+// It may allocate metadata, in which case *p.sysStat will be updated.
 //
-// s.mheapLock must be held.
-func (s *pageAlloc) grow(base, size uintptr) {
+// p.mheapLock must be held.
+func (p *pageAlloc) grow(base, size uintptr) {
 	// Round up to chunks, since we can't deal with increments smaller
 	// than chunks. Also, sysGrow expects aligned values.
 	limit := alignUp(base+size, pallocChunkBytes)
@@ -356,29 +356,29 @@ func (s *pageAlloc) grow(base, size uintptr) {
 
 	// Grow the summary levels in a system-dependent manner.
 	// We just update a bunch of additional metadata here.
-	s.sysGrow(base, limit)
+	p.sysGrow(base, limit)
 
-	// Update s.start and s.end.
+	// Update p.start and p.end.
 	// If no growth happened yet, start == 0. This is generally
 	// safe since the zero page is unmapped.
-	firstGrowth := s.start == 0
+	firstGrowth := p.start == 0
 	start, end := chunkIndex(base), chunkIndex(limit)
-	if firstGrowth || start < s.start {
-		s.start = start
+	if firstGrowth || start < p.start {
+		p.start = start
 	}
-	if end > s.end {
-		s.end = end
+	if end > p.end {
+		p.end = end
 	}
 	// Note that [base, limit) will never overlap with any existing
 	// range inUse because grow only ever adds never-used memory
 	// regions to the page allocator.
-	s.inUse.add(makeAddrRange(base, limit))
+	p.inUse.add(makeAddrRange(base, limit))
 
 	// A grow operation is a lot like a free operation, so if our
-	// chunk ends up below s.searchAddr, update s.searchAddr to the
+	// chunk ends up below p.searchAddr, update p.searchAddr to the
 	// new address, just like in free.
-	if b := (offAddr{base}); b.lessThan(s.searchAddr) {
-		s.searchAddr = b
+	if b := (offAddr{base}); b.lessThan(p.searchAddr) {
+		p.searchAddr = b
 	}
 
 	// Add entries into chunks, which is sparse, if needed. Then,
@@ -387,21 +387,21 @@ func (s *pageAlloc) grow(base, size uintptr) {
 	// Newly-grown memory is always considered scavenged.
 	// Set all the bits in the scavenged bitmaps high.
 	for c := chunkIndex(base); c < chunkIndex(limit); c++ {
-		if s.chunks[c.l1()] == nil {
+		if p.chunks[c.l1()] == nil {
 			// Create the necessary l2 entry.
 			//
 			// Store it atomically to avoid races with readers which
 			// don't acquire the heap lock.
-			r := sysAlloc(unsafe.Sizeof(*s.chunks[0]), s.sysStat)
-			atomic.StorepNoWB(unsafe.Pointer(&s.chunks[c.l1()]), r)
+			r := sysAlloc(unsafe.Sizeof(*p.chunks[0]), p.sysStat)
+			atomic.StorepNoWB(unsafe.Pointer(&p.chunks[c.l1()]), r)
 		}
-		s.chunkOf(c).scavenged.setRange(0, pallocChunkPages)
+		p.chunkOf(c).scavenged.setRange(0, pallocChunkPages)
 	}
 
 	// Update summaries accordingly. The grow acts like a free, so
 	// we need to ensure this newly-free memory is visible in the
 	// summaries.
-	s.update(base, size/pageSize, true, false)
+	p.update(base, size/pageSize, true, false)
 }
 
 // update updates heap metadata. It must be called each time the bitmap
@@ -411,8 +411,8 @@ func (s *pageAlloc) grow(base, size uintptr) {
 // a contiguous allocation or free between addr and addr+npages. alloc indicates
 // whether the operation performed was an allocation or a free.
 //
-// s.mheapLock must be held.
-func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
+// p.mheapLock must be held.
+func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
 	// base, limit, start, and end are inclusive.
 	limit := base + npages*pageSize - 1
 	sc, ec := chunkIndex(base), chunkIndex(limit)
@@ -421,23 +421,23 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
 	if sc == ec {
 		// Fast path: the allocation doesn't span more than one chunk,
 		// so update this one and if the summary didn't change, return.
-		x := s.summary[len(s.summary)-1][sc]
-		y := s.chunkOf(sc).summarize()
+		x := p.summary[len(p.summary)-1][sc]
+		y := p.chunkOf(sc).summarize()
 		if x == y {
 			return
 		}
-		s.summary[len(s.summary)-1][sc] = y
+		p.summary[len(p.summary)-1][sc] = y
 	} else if contig {
 		// Slow contiguous path: the allocation spans more than one chunk
 		// and at least one summary is guaranteed to change.
-		summary := s.summary[len(s.summary)-1]
+		summary := p.summary[len(p.summary)-1]
 
 		// Update the summary for chunk sc.
-		summary[sc] = s.chunkOf(sc).summarize()
+		summary[sc] = p.chunkOf(sc).summarize()
 
 		// Update the summaries for chunks in between, which are
 		// either totally allocated or freed.
-		whole := s.summary[len(s.summary)-1][sc+1 : ec]
+		whole := p.summary[len(p.summary)-1][sc+1 : ec]
 		if alloc {
 			// Should optimize into a memclr.
 			for i := range whole {
@@ -450,22 +450,22 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
 		}
 
 		// Update the summary for chunk ec.
-		summary[ec] = s.chunkOf(ec).summarize()
+		summary[ec] = p.chunkOf(ec).summarize()
 	} else {
 		// Slow general path: the allocation spans more than one chunk
 		// and at least one summary is guaranteed to change.
 		//
 		// We can't assume a contiguous allocation happened, so walk over
 		// every chunk in the range and manually recompute the summary.
-		summary := s.summary[len(s.summary)-1]
+		summary := p.summary[len(p.summary)-1]
 		for c := sc; c <= ec; c++ {
-			summary[c] = s.chunkOf(c).summarize()
+			summary[c] = p.chunkOf(c).summarize()
 		}
 	}
 
 	// Walk up the radix tree and update the summaries appropriately.
 	changed := true
-	for l := len(s.summary) - 2; l >= 0 && changed; l-- {
+	for l := len(p.summary) - 2; l >= 0 && changed; l-- {
 		// Update summaries at level l from summaries at level l+1.
 		changed = false
 
@@ -479,12 +479,12 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
 
 		// Iterate over each block, updating the corresponding summary in the less-granular level.
 		for i := lo; i < hi; i++ {
-			children := s.summary[l+1][i<<logEntriesPerBlock : (i+1)<<logEntriesPerBlock]
+			children := p.summary[l+1][i<<logEntriesPerBlock : (i+1)<<logEntriesPerBlock]
 			sum := mergeSummaries(children, logMaxPages)
-			old := s.summary[l][i]
+			old := p.summary[l][i]
 			if old != sum {
 				changed = true
-				s.summary[l][i] = sum
+				p.summary[l][i] = sum
 			}
 		}
 	}
@@ -497,8 +497,8 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
 // Returns the amount of scavenged memory in bytes present in the
 // allocated range.
 //
-// s.mheapLock must be held.
-func (s *pageAlloc) allocRange(base, npages uintptr) uintptr {
+// p.mheapLock must be held.
+func (p *pageAlloc) allocRange(base, npages uintptr) uintptr {
 	limit := base + npages*pageSize - 1
 	sc, ec := chunkIndex(base), chunkIndex(limit)
 	si, ei := chunkPageIndex(base), chunkPageIndex(limit)
@@ -506,24 +506,24 @@ func (s *pageAlloc) allocRange(base, npages uintptr) uintptr {
 	scav := uint(0)
 	if sc == ec {
 		// The range doesn't cross any chunk boundaries.
-		chunk := s.chunkOf(sc)
+		chunk := p.chunkOf(sc)
 		scav += chunk.scavenged.popcntRange(si, ei+1-si)
 		chunk.allocRange(si, ei+1-si)
 	} else {
 		// The range crosses at least one chunk boundary.
-		chunk := s.chunkOf(sc)
+		chunk := p.chunkOf(sc)
 		scav += chunk.scavenged.popcntRange(si, pallocChunkPages-si)
 		chunk.allocRange(si, pallocChunkPages-si)
 		for c := sc + 1; c < ec; c++ {
-			chunk := s.chunkOf(c)
+			chunk := p.chunkOf(c)
 			scav += chunk.scavenged.popcntRange(0, pallocChunkPages)
 			chunk.allocAll()
 		}
-		chunk = s.chunkOf(ec)
+		chunk = p.chunkOf(ec)
 		scav += chunk.scavenged.popcntRange(0, ei+1)
 		chunk.allocRange(0, ei+1)
 	}
-	s.update(base, npages, true, true)
+	p.update(base, npages, true, true)
 	return uintptr(scav) * pageSize
 }
 
@@ -532,13 +532,13 @@ func (s *pageAlloc) allocRange(base, npages uintptr) uintptr {
 // returned. If addr is higher than any mapped region, then
 // it returns maxOffAddr.
 //
-// s.mheapLock must be held.
-func (s *pageAlloc) findMappedAddr(addr offAddr) offAddr {
+// p.mheapLock must be held.
+func (p *pageAlloc) findMappedAddr(addr offAddr) offAddr {
 	// If we're not in a test, validate first by checking mheap_.arenas.
 	// This is a fast path which is only safe to use outside of testing.
 	ai := arenaIndex(addr.addr())
-	if s.test || mheap_.arenas[ai.l1()] == nil || mheap_.arenas[ai.l1()][ai.l2()] == nil {
-		vAddr, ok := s.inUse.findAddrGreaterEqual(addr.addr())
+	if p.test || mheap_.arenas[ai.l1()] == nil || mheap_.arenas[ai.l1()][ai.l2()] == nil {
+		vAddr, ok := p.inUse.findAddrGreaterEqual(addr.addr())
 		if ok {
 			return offAddr{vAddr}
 		} else {
@@ -554,20 +554,20 @@ func (s *pageAlloc) findMappedAddr(addr offAddr) offAddr {
 // find searches for the first (address-ordered) contiguous free region of
 // npages in size and returns a base address for that region.
 //
-// It uses s.searchAddr to prune its search and assumes that no palloc chunks
-// below chunkIndex(s.searchAddr) contain any free memory at all.
+// It uses p.searchAddr to prune its search and assumes that no palloc chunks
+// below chunkIndex(p.searchAddr) contain any free memory at all.
 //
-// find also computes and returns a candidate s.searchAddr, which may or
-// may not prune more of the address space than s.searchAddr already does.
-// This candidate is always a valid s.searchAddr.
+// find also computes and returns a candidate p.searchAddr, which may or
+// may not prune more of the address space than p.searchAddr already does.
+// This candidate is always a valid p.searchAddr.
 //
 // find represents the slow path and the full radix tree search.
 //
 // Returns a base address of 0 on failure, in which case the candidate
 // searchAddr returned is invalid and must be ignored.
 //
-// s.mheapLock must be held.
-func (s *pageAlloc) find(npages uintptr) (uintptr, offAddr) {
+// p.mheapLock must be held.
+func (p *pageAlloc) find(npages uintptr) (uintptr, offAddr) {
 	// Search algorithm.
 	//
 	// This algorithm walks each level l of the radix tree from the root level
@@ -647,7 +647,7 @@ func (s *pageAlloc) find(npages uintptr) (uintptr, offAddr) {
 	lastSumIdx := -1
 
 nextLevel:
-	for l := 0; l < len(s.summary); l++ {
+	for l := 0; l < len(p.summary); l++ {
 		// For the root level, entriesPerBlock is the whole level.
 		entriesPerBlock := 1 << levelBits[l]
 		logMaxPages := levelLogPages[l]
@@ -657,14 +657,14 @@ nextLevel:
 		i <<= levelBits[l]
 
 		// Slice out the block of entries we care about.
-		entries := s.summary[l][i : i+entriesPerBlock]
+		entries := p.summary[l][i : i+entriesPerBlock]
 
 		// Determine j0, the first index we should start iterating from.
 		// The searchAddr may help us eliminate iterations if we followed the
 		// searchAddr on the previous level or we're on the root leve, in which
 		// case the searchAddr should be the same as i after levelShift.
 		j0 := 0
-		if searchIdx := offAddrToLevelIndex(l, s.searchAddr); searchIdx&^(entriesPerBlock-1) == i {
+		if searchIdx := offAddrToLevelIndex(l, p.searchAddr); searchIdx&^(entriesPerBlock-1) == i {
 			j0 = searchIdx & (entriesPerBlock - 1)
 		}
 
@@ -729,7 +729,7 @@ nextLevel:
 			// We found a sufficiently large run of free pages straddling
 			// some boundary, so compute the address and return it.
 			addr := levelIndexToOffAddr(l, i).add(uintptr(base) * pageSize).addr()
-			return addr, s.findMappedAddr(firstFree.base)
+			return addr, p.findMappedAddr(firstFree.base)
 		}
 		if l == 0 {
 			// We're at level zero, so that means we've exhausted our search.
@@ -741,7 +741,7 @@ nextLevel:
 		// lied to us. In either case, dump some useful state and throw.
 		print("runtime: summary[", l-1, "][", lastSumIdx, "] = ", lastSum.start(), ", ", lastSum.max(), ", ", lastSum.end(), "\n")
 		print("runtime: level = ", l, ", npages = ", npages, ", j0 = ", j0, "\n")
-		print("runtime: s.searchAddr = ", hex(s.searchAddr.addr()), ", i = ", i, "\n")
+		print("runtime: p.searchAddr = ", hex(p.searchAddr.addr()), ", i = ", i, "\n")
 		print("runtime: levelShift[level] = ", levelShift[l], ", levelBits[level] = ", levelBits[l], "\n")
 		for j := 0; j < len(entries); j++ {
 			sum := entries[j]
@@ -758,12 +758,12 @@ nextLevel:
 	// After iterating over all levels, i must contain a chunk index which
 	// is what the final level represents.
 	ci := chunkIdx(i)
-	j, searchIdx := s.chunkOf(ci).find(npages, 0)
+	j, searchIdx := p.chunkOf(ci).find(npages, 0)
 	if j == ^uint(0) {
 		// We couldn't find any space in this chunk despite the summaries telling
 		// us it should be there. There's likely a bug, so dump some state and throw.
-		sum := s.summary[len(s.summary)-1][i]
-		print("runtime: summary[", len(s.summary)-1, "][", i, "] = (", sum.start(), ", ", sum.max(), ", ", sum.end(), ")\n")
+		sum := p.summary[len(p.summary)-1][i]
+		print("runtime: summary[", len(p.summary)-1, "][", i, "] = (", sum.start(), ", ", sum.max(), ", ", sum.end(), ")\n")
 		print("runtime: npages = ", npages, "\n")
 		throw("bad summary data")
 	}
@@ -775,7 +775,7 @@ nextLevel:
 	// found an even narrower free window.
 	searchAddr := chunkBase(ci) + uintptr(searchIdx)*pageSize
 	foundFree(offAddr{searchAddr}, chunkBase(ci+1)-searchAddr)
-	return addr, s.findMappedAddr(firstFree.base)
+	return addr, p.findMappedAddr(firstFree.base)
 }
 
 // alloc allocates npages worth of memory from the page heap, returning the base
@@ -785,25 +785,25 @@ nextLevel:
 // Returns a 0 base address on failure, in which case other returned values
 // should be ignored.
 //
-// s.mheapLock must be held.
-func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
+// p.mheapLock must be held.
+func (p *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
 	// If the searchAddr refers to a region which has a higher address than
 	// any known chunk, then we know we're out of memory.
-	if chunkIndex(s.searchAddr.addr()) >= s.end {
+	if chunkIndex(p.searchAddr.addr()) >= p.end {
 		return 0, 0
 	}
 
 	// If npages has a chance of fitting in the chunk where the searchAddr is,
 	// search it directly.
 	searchAddr := minOffAddr
-	if pallocChunkPages-chunkPageIndex(s.searchAddr.addr()) >= uint(npages) {
+	if pallocChunkPages-chunkPageIndex(p.searchAddr.addr()) >= uint(npages) {
 		// npages is guaranteed to be no greater than pallocChunkPages here.
-		i := chunkIndex(s.searchAddr.addr())
-		if max := s.summary[len(s.summary)-1][i].max(); max >= uint(npages) {
-			j, searchIdx := s.chunkOf(i).find(npages, chunkPageIndex(s.searchAddr.addr()))
+		i := chunkIndex(p.searchAddr.addr())
+		if max := p.summary[len(p.summary)-1][i].max(); max >= uint(npages) {
+			j, searchIdx := p.chunkOf(i).find(npages, chunkPageIndex(p.searchAddr.addr()))
 			if j == ^uint(0) {
 				print("runtime: max = ", max, ", npages = ", npages, "\n")
-				print("runtime: searchIdx = ", chunkPageIndex(s.searchAddr.addr()), ", s.searchAddr = ", hex(s.searchAddr.addr()), "\n")
+				print("runtime: searchIdx = ", chunkPageIndex(p.searchAddr.addr()), ", p.searchAddr = ", hex(p.searchAddr.addr()), "\n")
 				throw("bad summary data")
 			}
 			addr = chunkBase(i) + uintptr(j)*pageSize
@@ -813,7 +813,7 @@ func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
 	}
 	// We failed to use a searchAddr for one reason or another, so try
 	// the slow path.
-	addr, searchAddr = s.find(npages)
+	addr, searchAddr = p.find(npages)
 	if addr == 0 {
 		if npages == 1 {
 			// We failed to find a single free page, the smallest unit
@@ -821,41 +821,41 @@ func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
 			// exhausted. Otherwise, the heap still might have free
 			// space in it, just not enough contiguous space to
 			// accommodate npages.
-			s.searchAddr = maxSearchAddr
+			p.searchAddr = maxSearchAddr
 		}
 		return 0, 0
 	}
 Found:
 	// Go ahead and actually mark the bits now that we have an address.
-	scav = s.allocRange(addr, npages)
+	scav = p.allocRange(addr, npages)
 
 	// If we found a higher searchAddr, we know that all the
 	// heap memory before that searchAddr in an offset address space is
-	// allocated, so bump s.searchAddr up to the new one.
-	if s.searchAddr.lessThan(searchAddr) {
-		s.searchAddr = searchAddr
+	// allocated, so bump p.searchAddr up to the new one.
+	if p.searchAddr.lessThan(searchAddr) {
+		p.searchAddr = searchAddr
 	}
 	return addr, scav
 }
 
 // free returns npages worth of memory starting at base back to the page heap.
 //
-// s.mheapLock must be held.
-func (s *pageAlloc) free(base, npages uintptr) {
-	// If we're freeing pages below the s.searchAddr, update searchAddr.
-	if b := (offAddr{base}); b.lessThan(s.searchAddr) {
-		s.searchAddr = b
+// p.mheapLock must be held.
+func (p *pageAlloc) free(base, npages uintptr) {
+	// If we're freeing pages below the p.searchAddr, update searchAddr.
+	if b := (offAddr{base}); b.lessThan(p.searchAddr) {
+		p.searchAddr = b
 	}
 	// Update the free high watermark for the scavenger.
 	limit := base + npages*pageSize - 1
-	if offLimit := (offAddr{limit}); s.scav.freeHWM.lessThan(offLimit) {
-		s.scav.freeHWM = offLimit
+	if offLimit := (offAddr{limit}); p.scav.freeHWM.lessThan(offLimit) {
+		p.scav.freeHWM = offLimit
 	}
 	if npages == 1 {
 		// Fast path: we're clearing a single bit, and we know exactly
 		// where it is, so mark it directly.
 		i := chunkIndex(base)
-		s.chunkOf(i).free1(chunkPageIndex(base))
+		p.chunkOf(i).free1(chunkPageIndex(base))
 	} else {
 		// Slow path: we're clearing more bits so we may need to iterate.
 		sc, ec := chunkIndex(base), chunkIndex(limit)
@@ -863,17 +863,17 @@ func (s *pageAlloc) free(base, npages uintptr) {
 
 		if sc == ec {
 			// The range doesn't cross any chunk boundaries.
-			s.chunkOf(sc).free(si, ei+1-si)
+			p.chunkOf(sc).free(si, ei+1-si)
 		} else {
 			// The range crosses at least one chunk boundary.
-			s.chunkOf(sc).free(si, pallocChunkPages-si)
+			p.chunkOf(sc).free(si, pallocChunkPages-si)
 			for c := sc + 1; c < ec; c++ {
-				s.chunkOf(c).freeAll()
+				p.chunkOf(c).freeAll()
 			}
-			s.chunkOf(ec).free(0, ei+1)
+			p.chunkOf(ec).free(0, ei+1)
 		}
 	}
-	s.update(base, npages, true, false)
+	p.update(base, npages, true, false)
 }
 
 const (
diff --git a/src/runtime/mpagealloc_32bit.go b/src/runtime/mpagealloc_32bit.go
index 90f1e54d6c..331dadade9 100644
--- a/src/runtime/mpagealloc_32bit.go
+++ b/src/runtime/mpagealloc_32bit.go
@@ -60,7 +60,7 @@ var levelLogPages = [summaryLevels]uint{
 }
 
 // See mpagealloc_64bit.go for details.
-func (s *pageAlloc) sysInit() {
+func (p *pageAlloc) sysInit() {
 	// Calculate how much memory all our entries will take up.
 	//
 	// This should be around 12 KiB or less.
@@ -76,7 +76,7 @@ func (s *pageAlloc) sysInit() {
 		throw("failed to reserve page summary memory")
 	}
 	// There isn't much. Just map it and mark it as used immediately.
-	sysMap(reservation, totalSize, s.sysStat)
+	sysMap(reservation, totalSize, p.sysStat)
 	sysUsed(reservation, totalSize)
 
 	// Iterate over the reservation and cut it up into slices.
@@ -88,29 +88,29 @@ func (s *pageAlloc) sysInit() {
 
 		// Put this reservation into a slice.
 		sl := notInHeapSlice{(*notInHeap)(reservation), 0, entries}
-		s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
+		p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
 
 		reservation = add(reservation, uintptr(entries)*pallocSumBytes)
 	}
 }
 
 // See mpagealloc_64bit.go for details.
-func (s *pageAlloc) sysGrow(base, limit uintptr) {
+func (p *pageAlloc) sysGrow(base, limit uintptr) {
 	if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
 		print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
 		throw("sysGrow bounds not aligned to pallocChunkBytes")
 	}
 
 	// Walk up the tree and update the summary slices.
-	for l := len(s.summary) - 1; l >= 0; l-- {
+	for l := len(p.summary) - 1; l >= 0; l-- {
 		// Figure out what part of the summary array this new address space needs.
 		// Note that we need to align the ranges to the block width (1<<levelBits[l])
 		// at this level because the full block is needed to compute the summary for
 		// the next level.
 		lo, hi := addrsToSummaryRange(l, base, limit)
 		_, hi = blockAlignSummaryRange(l, lo, hi)
-		if hi > len(s.summary[l]) {
-			s.summary[l] = s.summary[l][:hi]
+		if hi > len(p.summary[l]) {
+			p.summary[l] = p.summary[l][:hi]
 		}
 	}
 }
diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go
index a1691ba802..ffacb46c18 100644
--- a/src/runtime/mpagealloc_64bit.go
+++ b/src/runtime/mpagealloc_64bit.go
@@ -67,7 +67,7 @@ var levelLogPages = [summaryLevels]uint{
 // sysInit performs architecture-dependent initialization of fields
 // in pageAlloc. pageAlloc should be uninitialized except for sysStat
 // if any runtime statistic should be updated.
-func (s *pageAlloc) sysInit() {
+func (p *pageAlloc) sysInit() {
 	// Reserve memory for each level. This will get mapped in
 	// as R/W by setArenas.
 	for l, shift := range levelShift {
@@ -82,21 +82,21 @@ func (s *pageAlloc) sysInit() {
 
 		// Put this reservation into a slice.
 		sl := notInHeapSlice{(*notInHeap)(r), 0, entries}
-		s.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
+		p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
 	}
 }
 
 // sysGrow performs architecture-dependent operations on heap
 // growth for the page allocator, such as mapping in new memory
 // for summaries. It also updates the length of the slices in
-// s.summary.
+// [.summary.
 //
 // base is the base of the newly-added heap memory and limit is
 // the first address past the end of the newly-added heap memory.
 // Both must be aligned to pallocChunkBytes.
 //
-// The caller must update s.start and s.end after calling sysGrow.
-func (s *pageAlloc) sysGrow(base, limit uintptr) {
+// The caller must update p.start and p.end after calling sysGrow.
+func (p *pageAlloc) sysGrow(base, limit uintptr) {
 	if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
 		print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
 		throw("sysGrow bounds not aligned to pallocChunkBytes")
@@ -111,12 +111,12 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
 	}
 
 	// summaryRangeToSumAddrRange converts a range of indices in any
-	// level of s.summary into page-aligned addresses which cover that
+	// level of p.summary into page-aligned addresses which cover that
 	// range of indices.
 	summaryRangeToSumAddrRange := func(level, sumIdxBase, sumIdxLimit int) addrRange {
 		baseOffset := alignDown(uintptr(sumIdxBase)*pallocSumBytes, physPageSize)
 		limitOffset := alignUp(uintptr(sumIdxLimit)*pallocSumBytes, physPageSize)
-		base := unsafe.Pointer(&s.summary[level][0])
+		base := unsafe.Pointer(&p.summary[level][0])
 		return addrRange{
 			offAddr{uintptr(add(base, baseOffset))},
 			offAddr{uintptr(add(base, limitOffset))},
@@ -140,10 +140,10 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
 	//
 	// This will be used to look at what memory in the summary array is already
 	// mapped before and after this new range.
-	inUseIndex := s.inUse.findSucc(base)
+	inUseIndex := p.inUse.findSucc(base)
 
 	// Walk up the radix tree and map summaries in as needed.
-	for l := range s.summary {
+	for l := range p.summary {
 		// Figure out what part of the summary array this new address space needs.
 		needIdxBase, needIdxLimit := addrRangeToSummaryRange(l, makeAddrRange(base, limit))
 
@@ -151,8 +151,8 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
 		// we get tight bounds checks on at least the top bound.
 		//
 		// We must do this regardless of whether we map new memory.
-		if needIdxLimit > len(s.summary[l]) {
-			s.summary[l] = s.summary[l][:needIdxLimit]
+		if needIdxLimit > len(p.summary[l]) {
+			p.summary[l] = p.summary[l][:needIdxLimit]
 		}
 
 		// Compute the needed address range in the summary array for level l.
@@ -163,10 +163,10 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
 		// for mapping. prune's invariants are guaranteed by the fact that this
 		// function will never be asked to remap the same memory twice.
 		if inUseIndex > 0 {
-			need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex-1]))
+			need = need.subtract(addrRangeToSumAddrRange(l, p.inUse.ranges[inUseIndex-1]))
 		}
-		if inUseIndex < len(s.inUse.ranges) {
-			need = need.subtract(addrRangeToSumAddrRange(l, s.inUse.ranges[inUseIndex]))
+		if inUseIndex < len(p.inUse.ranges) {
+			need = need.subtract(addrRangeToSumAddrRange(l, p.inUse.ranges[inUseIndex]))
 		}
 		// It's possible that after our pruning above, there's nothing new to map.
 		if need.size() == 0 {
@@ -174,7 +174,7 @@ func (s *pageAlloc) sysGrow(base, limit uintptr) {
 		}
 
 		// Map and commit need.
-		sysMap(unsafe.Pointer(need.base.addr()), need.size(), s.sysStat)
+		sysMap(unsafe.Pointer(need.base.addr()), need.size(), p.sysStat)
 		sysUsed(unsafe.Pointer(need.base.addr()), need.size())
 	}
 }
diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go
index 683a997136..5f76501a1c 100644
--- a/src/runtime/mpagecache.go
+++ b/src/runtime/mpagecache.go
@@ -71,8 +71,8 @@ func (c *pageCache) allocN(npages uintptr) (uintptr, uintptr) {
 // into s. Then, it clears the cache, such that empty returns
 // true.
 //
-// s.mheapLock must be held or the world must be stopped.
-func (c *pageCache) flush(s *pageAlloc) {
+// p.mheapLock must be held or the world must be stopped.
+func (c *pageCache) flush(p *pageAlloc) {
 	if c.empty() {
 		return
 	}
@@ -83,18 +83,18 @@ func (c *pageCache) flush(s *pageAlloc) {
 	// slower, safer thing by iterating over each bit individually.
 	for i := uint(0); i < 64; i++ {
 		if c.cache&(1<<i) != 0 {
-			s.chunkOf(ci).free1(pi + i)
+			p.chunkOf(ci).free1(pi + i)
 		}
 		if c.scav&(1<<i) != 0 {
-			s.chunkOf(ci).scavenged.setRange(pi+i, 1)
+			p.chunkOf(ci).scavenged.setRange(pi+i, 1)
 		}
 	}
 	// Since this is a lot like a free, we need to make sure
 	// we update the searchAddr just like free does.
-	if b := (offAddr{c.base}); b.lessThan(s.searchAddr) {
-		s.searchAddr = b
+	if b := (offAddr{c.base}); b.lessThan(p.searchAddr) {
+		p.searchAddr = b
 	}
-	s.update(c.base, pageCachePages, false, false)
+	p.update(c.base, pageCachePages, false, false)
 	*c = pageCache{}
 }
 
@@ -102,19 +102,19 @@ func (c *pageCache) flush(s *pageAlloc) {
 // may not be contiguous, and returns a pageCache structure which owns the
 // chunk.
 //
-// s.mheapLock must be held.
-func (s *pageAlloc) allocToCache() pageCache {
+// p.mheapLock must be held.
+func (p *pageAlloc) allocToCache() pageCache {
 	// If the searchAddr refers to a region which has a higher address than
 	// any known chunk, then we know we're out of memory.
-	if chunkIndex(s.searchAddr.addr()) >= s.end {
+	if chunkIndex(p.searchAddr.addr()) >= p.end {
 		return pageCache{}
 	}
 	c := pageCache{}
-	ci := chunkIndex(s.searchAddr.addr()) // chunk index
-	if s.summary[len(s.summary)-1][ci] != 0 {
+	ci := chunkIndex(p.searchAddr.addr()) // chunk index
+	if p.summary[len(p.summary)-1][ci] != 0 {
 		// Fast path: there's free pages at or near the searchAddr address.
-		chunk := s.chunkOf(ci)
-		j, _ := chunk.find(1, chunkPageIndex(s.searchAddr.addr()))
+		chunk := p.chunkOf(ci)
+		j, _ := chunk.find(1, chunkPageIndex(p.searchAddr.addr()))
 		if j == ^uint(0) {
 			throw("bad summary data")
 		}
@@ -126,15 +126,15 @@ func (s *pageAlloc) allocToCache() pageCache {
 	} else {
 		// Slow path: the searchAddr address had nothing there, so go find
 		// the first free page the slow way.
-		addr, _ := s.find(1)
+		addr, _ := p.find(1)
 		if addr == 0 {
 			// We failed to find adequate free space, so mark the searchAddr as OoM
 			// and return an empty pageCache.
-			s.searchAddr = maxSearchAddr
+			p.searchAddr = maxSearchAddr
 			return pageCache{}
 		}
 		ci := chunkIndex(addr)
-		chunk := s.chunkOf(ci)
+		chunk := p.chunkOf(ci)
 		c = pageCache{
 			base:  alignDown(addr, 64*pageSize),
 			cache: ^chunk.pages64(chunkPageIndex(addr)),
@@ -143,19 +143,19 @@ func (s *pageAlloc) allocToCache() pageCache {
 	}
 
 	// Set the bits as allocated and clear the scavenged bits.
-	s.allocRange(c.base, pageCachePages)
+	p.allocRange(c.base, pageCachePages)
 
 	// Update as an allocation, but note that it's not contiguous.
-	s.update(c.base, pageCachePages, false, true)
+	p.update(c.base, pageCachePages, false, true)
 
 	// Set the search address to the last page represented by the cache.
 	// Since all of the pages in this block are going to the cache, and we
 	// searched for the first free page, we can confidently start at the
 	// next page.
 	//
-	// However, s.searchAddr is not allowed to point into unmapped heap memory
+	// However, p.searchAddr is not allowed to point into unmapped heap memory
 	// unless it is maxSearchAddr, so make it the last page as opposed to
 	// the page after.
-	s.searchAddr = offAddr{c.base + pageSize*(pageCachePages-1)}
+	p.searchAddr = offAddr{c.base + pageSize*(pageCachePages-1)}
 	return c
 }
-- 
cgit v1.3


From 8ebc58452af3a586a3da1f68725bc83c78d4b073 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 29 Jul 2020 20:25:05 +0000
Subject: runtime: delineate which memstats are system stats with a type

This change modifies the type of several mstats fields to be a new type:
sysMemStat. This type has the same structure as the fields used to have.

The purpose of this change is to make it very clear which stats may be
used in various functions for accounting (usually the platform-specific
sys* functions, but there are others). Currently there's an implicit
understanding that the *uint64 value passed to these functions is some
kind of statistic whose value is atomically managed. This understanding
isn't inherently problematic, but we're about to change how some stats
(which currently use mSysStatInc and mSysStatDec) work, so we want to
make it very clear what the various requirements are around "sysStat".

This change also removes mSysStatInc and mSysStatDec in favor of a
method on sysMemStat. Note that those two functions were originally
written the way they were because atomic 64-bit adds required a valid G
on ARM, but this hasn't been the case for a very long time (since
golang.org/cl/14204, but even before then it wasn't clear if mutexes
required a valid G anymore). Today we implement 64-bit adds on ARM with
a spinlock table.

Change-Id: I4e9b37cf14afc2ae20cf736e874eb0064af086d7
Reviewed-on: https://go-review.googlesource.com/c/go/+/246971
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go |  4 +--
 src/runtime/heapdump.go    | 14 ++++----
 src/runtime/malloc.go      | 10 +++---
 src/runtime/mem_aix.go     | 12 +++----
 src/runtime/mem_bsd.go     | 12 +++----
 src/runtime/mem_darwin.go  | 12 +++----
 src/runtime/mem_js.go      | 10 +++---
 src/runtime/mem_linux.go   | 12 +++----
 src/runtime/mem_plan9.go   | 12 +++----
 src/runtime/mem_windows.go | 12 +++----
 src/runtime/mfixalloc.go   |  4 +--
 src/runtime/mgcscavenge.go |  4 +--
 src/runtime/mheap.go       | 28 ++++++++--------
 src/runtime/mpagealloc.go  |  4 +--
 src/runtime/mranges.go     |  4 +--
 src/runtime/mstats.go      | 82 +++++++++++++++++-----------------------------
 src/runtime/os_darwin.go   |  3 +-
 17 files changed, 109 insertions(+), 130 deletions(-)

(limited to 'src/runtime/mpagealloc.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 47cbc286f6..cb753ee819 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -820,7 +820,7 @@ type AddrRanges struct {
 // Add.
 func NewAddrRanges() AddrRanges {
 	r := addrRanges{}
-	r.init(new(uint64))
+	r.init(new(sysMemStat))
 	return AddrRanges{r, true}
 }
 
@@ -844,7 +844,7 @@ func MakeAddrRanges(a ...AddrRange) AddrRanges {
 	return AddrRanges{addrRanges{
 		ranges:     ranges,
 		totalBytes: total,
-		sysStat:    new(uint64),
+		sysStat:    new(sysMemStat),
 	}, false}
 }
 
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index 4c35309211..495ecc5164 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -548,20 +548,20 @@ func dumpmemstats() {
 	dumpint(memstats.nmalloc)
 	dumpint(memstats.nfree)
 	dumpint(memstats.heap_alloc)
-	dumpint(memstats.heap_sys)
+	dumpint(memstats.heap_sys.load())
 	dumpint(memstats.heap_idle)
 	dumpint(memstats.heap_inuse)
 	dumpint(memstats.heap_released)
 	dumpint(memstats.heap_objects)
 	dumpint(memstats.stacks_inuse)
-	dumpint(memstats.stacks_sys)
+	dumpint(memstats.stacks_sys.load())
 	dumpint(memstats.mspan_inuse)
-	dumpint(memstats.mspan_sys)
+	dumpint(memstats.mspan_sys.load())
 	dumpint(memstats.mcache_inuse)
-	dumpint(memstats.mcache_sys)
-	dumpint(memstats.buckhash_sys)
-	dumpint(memstats.gc_sys)
-	dumpint(memstats.other_sys)
+	dumpint(memstats.mcache_sys.load())
+	dumpint(memstats.buckhash_sys.load())
+	dumpint(memstats.gc_sys.load())
+	dumpint(memstats.other_sys.load())
 	dumpint(memstats.next_gc)
 	dumpint(memstats.last_gc_unix)
 	dumpint(memstats.pause_total_ns)
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 0f48d7f68e..27d678d917 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1313,7 +1313,7 @@ var persistentChunks *notInHeap
 // The returned memory will be zeroed.
 //
 // Consider marking persistentalloc'd types go:notinheap.
-func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+func persistentalloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	var p *notInHeap
 	systemstack(func() {
 		p = persistentalloc1(size, align, sysStat)
@@ -1324,7 +1324,7 @@ func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
 // Must run on system stack because stack growth can (re)invoke it.
 // See issue 9174.
 //go:systemstack
-func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
+func persistentalloc1(size, align uintptr, sysStat *sysMemStat) *notInHeap {
 	const (
 		maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
 	)
@@ -1383,8 +1383,8 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
 	}
 
 	if sysStat != &memstats.other_sys {
-		mSysStatInc(sysStat, size)
-		mSysStatDec(&memstats.other_sys, size)
+		sysStat.add(int64(size))
+		memstats.other_sys.add(-int64(size))
 	}
 	return p
 }
@@ -1425,7 +1425,7 @@ func (l *linearAlloc) init(base, size uintptr) {
 	l.end = base + size
 }
 
-func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+func (l *linearAlloc) alloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	p := alignUp(l.next, align)
 	if p+size > l.end {
 		return nil
diff --git a/src/runtime/mem_aix.go b/src/runtime/mem_aix.go
index 7e145b072a..957aa4dcc2 100644
--- a/src/runtime/mem_aix.go
+++ b/src/runtime/mem_aix.go
@@ -11,7 +11,7 @@ import (
 // Don't split the stack as this method may be invoked without a valid G, which
 // prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
 	if err != 0 {
 		if err == _EACCES {
@@ -24,7 +24,7 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
 		}
 		return nil
 	}
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 	return p
 }
 
@@ -41,8 +41,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	munmap(v, n)
 
 }
@@ -59,8 +59,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 	return p
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 
 	// AIX does not allow mapping a range that is already mapped.
 	// So, call mprotect to change permissions.
diff --git a/src/runtime/mem_bsd.go b/src/runtime/mem_bsd.go
index 4d860e7bd3..bc672019fb 100644
--- a/src/runtime/mem_bsd.go
+++ b/src/runtime/mem_bsd.go
@@ -13,12 +13,12 @@ import (
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
 	if err != 0 {
 		return nil
 	}
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 	return v
 }
 
@@ -35,8 +35,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	munmap(v, n)
 }
 
@@ -65,8 +65,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 const _sunosEAGAIN = 11
 const _ENOMEM = 12
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 
 	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
 	if err == _ENOMEM || ((GOOS == "solaris" || GOOS == "illumos") && err == _sunosEAGAIN) {
diff --git a/src/runtime/mem_darwin.go b/src/runtime/mem_darwin.go
index 3b5d565b0f..7fccd2bb8e 100644
--- a/src/runtime/mem_darwin.go
+++ b/src/runtime/mem_darwin.go
@@ -11,12 +11,12 @@ import (
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	v, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
 	if err != 0 {
 		return nil
 	}
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 	return v
 }
 
@@ -39,8 +39,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	munmap(v, n)
 }
 
@@ -58,8 +58,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 
 const _ENOMEM = 12
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 
 	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
 	if err == _ENOMEM {
diff --git a/src/runtime/mem_js.go b/src/runtime/mem_js.go
index 092b3d4fa2..957ed36ffa 100644
--- a/src/runtime/mem_js.go
+++ b/src/runtime/mem_js.go
@@ -13,7 +13,7 @@ import (
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	p := sysReserve(nil, n)
 	sysMap(p, n, sysStat)
 	return p
@@ -31,8 +31,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 }
 
 func sysFault(v unsafe.Pointer, n uintptr) {
@@ -80,6 +80,6 @@ func growMemory(pages int32) int32
 // This allows the front-end to replace the old DataView object with a new one.
 func resetMemoryDataView()
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 }
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
index 59b0bca970..3436851091 100644
--- a/src/runtime/mem_linux.go
+++ b/src/runtime/mem_linux.go
@@ -17,7 +17,7 @@ const (
 // Don't split the stack as this method may be invoked without a valid G, which
 // prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	p, err := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
 	if err != 0 {
 		if err == _EACCES {
@@ -30,7 +30,7 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
 		}
 		return nil
 	}
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 	return p
 }
 
@@ -144,8 +144,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	munmap(v, n)
 }
 
@@ -161,8 +161,8 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 	return p
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 
 	p, err := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
 	if err == _ENOMEM {
diff --git a/src/runtime/mem_plan9.go b/src/runtime/mem_plan9.go
index 4fea851cdd..53d8e6dffa 100644
--- a/src/runtime/mem_plan9.go
+++ b/src/runtime/mem_plan9.go
@@ -140,19 +140,19 @@ func sbrk(n uintptr) unsafe.Pointer {
 	return unsafe.Pointer(bl)
 }
 
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
 	lock(&memlock)
 	p := memAlloc(n)
 	memCheck()
 	unlock(&memlock)
 	if p != nil {
-		mSysStatInc(sysStat, n)
+		sysStat.add(int64(n))
 	}
 	return p
 }
 
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	lock(&memlock)
 	if uintptr(v)+n == bloc {
 		// Address range being freed is at the end of memory,
@@ -176,10 +176,10 @@ func sysUsed(v unsafe.Pointer, n uintptr) {
 func sysHugePage(v unsafe.Pointer, n uintptr) {
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
 	// sysReserve has already allocated all heap memory,
 	// but has not adjusted stats.
-	mSysStatInc(sysStat, n)
+	sysStat.add(int64(n))
 }
 
 func sysFault(v unsafe.Pointer, n uintptr) {
diff --git a/src/runtime/mem_windows.go b/src/runtime/mem_windows.go
index 165062ec27..3a805b9767 100644
--- a/src/runtime/mem_windows.go
+++ b/src/runtime/mem_windows.go
@@ -24,8 +24,8 @@ const (
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
-	mSysStatInc(sysStat, n)
+func sysAlloc(n uintptr, sysStat *sysMemStat) unsafe.Pointer {
+	sysStat.add(int64(n))
 	return unsafe.Pointer(stdcall4(_VirtualAlloc, 0, n, _MEM_COMMIT|_MEM_RESERVE, _PAGE_READWRITE))
 }
 
@@ -97,8 +97,8 @@ func sysHugePage(v unsafe.Pointer, n uintptr) {
 // Don't split the stack as this function may be invoked without a valid G,
 // which prevents us from allocating more stack.
 //go:nosplit
-func sysFree(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatDec(sysStat, n)
+func sysFree(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(-int64(n))
 	r := stdcall3(_VirtualFree, uintptr(v), 0, _MEM_RELEASE)
 	if r == 0 {
 		print("runtime: VirtualFree of ", n, " bytes failed with errno=", getlasterror(), "\n")
@@ -124,6 +124,6 @@ func sysReserve(v unsafe.Pointer, n uintptr) unsafe.Pointer {
 	return unsafe.Pointer(stdcall4(_VirtualAlloc, 0, n, _MEM_RESERVE, _PAGE_READWRITE))
 }
 
-func sysMap(v unsafe.Pointer, n uintptr, sysStat *uint64) {
-	mSysStatInc(sysStat, n)
+func sysMap(v unsafe.Pointer, n uintptr, sysStat *sysMemStat) {
+	sysStat.add(int64(n))
 }
diff --git a/src/runtime/mfixalloc.go b/src/runtime/mfixalloc.go
index f9dd6ca474..293c16b38b 100644
--- a/src/runtime/mfixalloc.go
+++ b/src/runtime/mfixalloc.go
@@ -32,7 +32,7 @@ type fixalloc struct {
 	chunk  uintptr // use uintptr instead of unsafe.Pointer to avoid write barriers
 	nchunk uint32
 	inuse  uintptr // in-use bytes now
-	stat   *uint64
+	stat   *sysMemStat
 	zero   bool // zero allocations
 }
 
@@ -49,7 +49,7 @@ type mlink struct {
 
 // Initialize f to allocate objects of the given size,
 // using the allocator to obtain chunks of memory.
-func (f *fixalloc) init(size uintptr, first func(arg, p unsafe.Pointer), arg unsafe.Pointer, stat *uint64) {
+func (f *fixalloc) init(size uintptr, first func(arg, p unsafe.Pointer), arg unsafe.Pointer, stat *sysMemStat) {
 	f.size = size
 	f.first = first
 	f.arg = arg
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 6328b295ca..8b1a0be353 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -100,7 +100,7 @@ const (
 
 // heapRetained returns an estimate of the current heap RSS.
 func heapRetained() uint64 {
-	return atomic.Load64(&memstats.heap_sys) - atomic.Load64(&memstats.heap_released)
+	return memstats.heap_sys.load() - atomic.Load64(&memstats.heap_released)
 }
 
 // gcPaceScavenger updates the scavenger's pacing, particularly
@@ -711,7 +711,7 @@ func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr
 
 	// Update global accounting only when not in test, otherwise
 	// the runtime's accounting will be wrong.
-	mSysStatInc(&memstats.heap_released, uintptr(npages)*pageSize)
+	atomic.Xadd64(&memstats.heap_released, int64(npages)*pageSize)
 	return addr
 }
 
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index df659e222b..27c1bfbcf1 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -1222,22 +1222,22 @@ HaveSpan:
 		// sysUsed all the pages that are actually available
 		// in the span since some of them might be scavenged.
 		sysUsed(unsafe.Pointer(base), nbytes)
-		mSysStatDec(&memstats.heap_released, scav)
+		atomic.Xadd64(&memstats.heap_released, -int64(scav))
 	}
 	// Update stats.
 	switch typ {
 	case spanAllocHeap:
-		mSysStatInc(&memstats.heap_inuse, nbytes)
+		atomic.Xadd64(&memstats.heap_inuse, int64(nbytes))
 	case spanAllocStack:
-		mSysStatInc(&memstats.stacks_inuse, nbytes)
+		atomic.Xadd64(&memstats.stacks_inuse, int64(nbytes))
 	case spanAllocPtrScalarBits, spanAllocWorkBuf:
-		mSysStatInc(&memstats.gc_sys, nbytes)
+		memstats.gc_sys.add(int64(nbytes))
 	}
 	if typ.manual() {
 		// Manually managed memory doesn't count toward heap_sys.
-		mSysStatDec(&memstats.heap_sys, nbytes)
+		memstats.heap_sys.add(-int64(nbytes))
 	}
-	mSysStatDec(&memstats.heap_idle, nbytes)
+	atomic.Xadd64(&memstats.heap_idle, -int64(nbytes))
 
 	// Publish the span in various locations.
 
@@ -1314,8 +1314,8 @@ func (h *mheap) grow(npage uintptr) bool {
 		// The allocation is always aligned to the heap arena
 		// size which is always > physPageSize, so its safe to
 		// just add directly to heap_released.
-		mSysStatInc(&memstats.heap_released, asize)
-		mSysStatInc(&memstats.heap_idle, asize)
+		atomic.Xadd64(&memstats.heap_released, int64(asize))
+		atomic.Xadd64(&memstats.heap_idle, int64(asize))
 
 		// Recalculate nBase.
 		// We know this won't overflow, because sysAlloc returned
@@ -1400,18 +1400,20 @@ func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
 	// Update stats.
 	//
 	// Mirrors the code in allocSpan.
+	nbytes := s.npages * pageSize
 	switch typ {
 	case spanAllocHeap:
-		mSysStatDec(&memstats.heap_inuse, s.npages*pageSize)
+		atomic.Xadd64(&memstats.heap_inuse, -int64(nbytes))
 	case spanAllocStack:
-		mSysStatDec(&memstats.stacks_inuse, s.npages*pageSize)
+		atomic.Xadd64(&memstats.stacks_inuse, -int64(nbytes))
 	case spanAllocPtrScalarBits, spanAllocWorkBuf:
-		mSysStatDec(&memstats.gc_sys, s.npages*pageSize)
+		memstats.gc_sys.add(-int64(nbytes))
 	}
 	if typ.manual() {
-		mSysStatInc(&memstats.heap_sys, s.npages*pageSize)
+		// Manually managed memory doesn't count toward heap_sys, so add it back.
+		memstats.heap_sys.add(int64(nbytes))
 	}
-	mSysStatInc(&memstats.heap_idle, s.npages*pageSize)
+	atomic.Xadd64(&memstats.heap_idle, int64(nbytes))
 
 	// Mark the space as free.
 	h.pages.free(s.base(), s.npages)
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 560babed03..2af1c97e0b 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -293,13 +293,13 @@ type pageAlloc struct {
 
 	// sysStat is the runtime memstat to update when new system
 	// memory is committed by the pageAlloc for allocation metadata.
-	sysStat *uint64
+	sysStat *sysMemStat
 
 	// Whether or not this struct is being used in tests.
 	test bool
 }
 
-func (p *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
+func (p *pageAlloc) init(mheapLock *mutex, sysStat *sysMemStat) {
 	if levelLogPages[0] > logMaxPackedValue {
 		// We can't represent 1<<levelLogPages[0] pages, the maximum number
 		// of pages we need to represent at the root level, in a summary, which
diff --git a/src/runtime/mranges.go b/src/runtime/mranges.go
index 1109f506a6..16acadcff1 100644
--- a/src/runtime/mranges.go
+++ b/src/runtime/mranges.go
@@ -160,10 +160,10 @@ type addrRanges struct {
 	totalBytes uintptr
 
 	// sysStat is the stat to track allocations by this type
-	sysStat *uint64
+	sysStat *sysMemStat
 }
 
-func (a *addrRanges) init(sysStat *uint64) {
+func (a *addrRanges) init(sysStat *sysMemStat) {
 	ranges := (*notInHeapSlice)(unsafe.Pointer(&a.ranges))
 	ranges.len = 0
 	ranges.cap = 16
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 64687c24e5..571a9c9ce3 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -8,7 +8,6 @@ package runtime
 
 import (
 	"runtime/internal/atomic"
-	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -35,11 +34,11 @@ type mstats struct {
 	//
 	// Like MemStats, heap_sys and heap_inuse do not count memory
 	// in manually-managed spans.
-	heap_alloc    uint64 // bytes allocated and not yet freed (same as alloc above)
-	heap_sys      uint64 // virtual address space obtained from system for GC'd heap
-	heap_idle     uint64 // bytes in idle spans
-	heap_inuse    uint64 // bytes in mSpanInUse spans
-	heap_released uint64 // bytes released to the os
+	heap_alloc    uint64     // bytes allocated and not yet freed (same as alloc above)
+	heap_sys      sysMemStat // virtual address space obtained from system for GC'd heap
+	heap_idle     uint64     // bytes in idle spans
+	heap_inuse    uint64     // bytes in mSpanInUse spans
+	heap_released uint64     // bytes released to the os
 
 	// heap_objects is not used by the runtime directly and instead
 	// computed on the fly by updatememstats.
@@ -47,15 +46,15 @@ type mstats struct {
 
 	// Statistics about allocation of low-level fixed-size structures.
 	// Protected by FixAlloc locks.
-	stacks_inuse uint64 // bytes in manually-managed stack spans; updated atomically or during STW
-	stacks_sys   uint64 // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
-	mspan_inuse  uint64 // mspan structures
-	mspan_sys    uint64
+	stacks_inuse uint64     // bytes in manually-managed stack spans; updated atomically or during STW
+	stacks_sys   sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
+	mspan_inuse  uint64     // mspan structures
+	mspan_sys    sysMemStat
 	mcache_inuse uint64 // mcache structures
-	mcache_sys   uint64
-	buckhash_sys uint64 // profiling bucket hash table
-	gc_sys       uint64 // updated atomically or during STW
-	other_sys    uint64 // updated atomically or during STW
+	mcache_sys   sysMemStat
+	buckhash_sys sysMemStat // profiling bucket hash table
+	gc_sys       sysMemStat // updated atomically or during STW
+	other_sys    sysMemStat // updated atomically or during STW
 
 	// Statistics about the garbage collector.
 
@@ -533,8 +532,9 @@ func updatememstats() {
 
 	memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
 	memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
-	memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
-		memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
+	memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() +
+		memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gc_sys.load() +
+		memstats.other_sys.load()
 
 	// We also count stacks_inuse as sys memory.
 	memstats.sys += memstats.stacks_inuse
@@ -625,46 +625,24 @@ func flushallmcaches() {
 	}
 }
 
-// Atomically increases a given *system* memory stat. We are counting on this
-// stat never overflowing a uintptr, so this function must only be used for
-// system memory stats.
+// sysMemStat represents a global system statistic that is managed atomically.
 //
-// The current implementation for little endian architectures is based on
-// xadduintptr(), which is less than ideal: xadd64() should really be used.
-// Using xadduintptr() is a stop-gap solution until arm supports xadd64() that
-// doesn't use locks.  (Locks are a problem as they require a valid G, which
-// restricts their useability.)
-//
-// A side-effect of using xadduintptr() is that we need to check for
-// overflow errors.
-//go:nosplit
-func mSysStatInc(sysStat *uint64, n uintptr) {
-	if sysStat == nil {
-		return
-	}
-	if sys.BigEndian {
-		atomic.Xadd64(sysStat, int64(n))
-		return
-	}
-	if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), n); val < n {
-		print("runtime: stat overflow: val ", val, ", n ", n, "\n")
-		exit(2)
-	}
+// This type must structurally be a uint64 so that mstats aligns with MemStats.
+type sysMemStat uint64
+
+// load atomically reads the value of the stat.
+func (s *sysMemStat) load() uint64 {
+	return atomic.Load64((*uint64)(s))
 }
 
-// Atomically decreases a given *system* memory stat. Same comments as
-// mSysStatInc apply.
-//go:nosplit
-func mSysStatDec(sysStat *uint64, n uintptr) {
-	if sysStat == nil {
-		return
-	}
-	if sys.BigEndian {
-		atomic.Xadd64(sysStat, -int64(n))
+// add atomically adds the sysMemStat by n.
+func (s *sysMemStat) add(n int64) {
+	if s == nil {
 		return
 	}
-	if val := atomic.Xadduintptr((*uintptr)(unsafe.Pointer(sysStat)), uintptr(-int64(n))); val+n < n {
-		print("runtime: stat underflow: val ", val, ", n ", n, "\n")
-		exit(2)
+	val := atomic.Xadd64((*uint64)(s), n)
+	if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) {
+		print("runtime: val=", val, " n=", n, "\n")
+		throw("sysMemStat overflow")
 	}
 }
diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go
index 394bd6fb0f..3f5bb7cf96 100644
--- a/src/runtime/os_darwin.go
+++ b/src/runtime/os_darwin.go
@@ -198,7 +198,6 @@ func newosproc(mp *m) {
 		exit(1)
 	}
 	mp.g0.stack.hi = stacksize // for mstart
-	//mSysStatInc(&memstats.stacks_sys, stacksize) //TODO: do this?
 
 	// Tell the pthread library we won't join with this thread.
 	if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
@@ -247,7 +246,7 @@ func newosproc0(stacksize uintptr, fn uintptr) {
 		exit(1)
 	}
 	g0.stack.hi = stacksize // for mstart
-	mSysStatInc(&memstats.stacks_sys, stacksize)
+	memstats.stacks_sys.add(int64(stacksize))
 
 	// Tell the pthread library we won't join with this thread.
 	if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
-- 
cgit v1.3


From 9393b5bae5944acebed3ab6f995926b7de3ce429 Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Fri, 21 Aug 2020 11:59:55 -0400
Subject: runtime: add heap lock assertions

Some functions that required holding the heap lock _or_ world stop have
been simplified to simply requiring the heap lock. This is conceptually
simpler and taking the heap lock during world stop is guaranteed to not
contend. This was only done on functions already called on the
systemstack to avoid too many extra systemstack calls in GC.

Updates #40677

Change-Id: I15aa1dadcdd1a81aac3d2a9ecad6e7d0377befdc
Reviewed-on: https://go-review.googlesource.com/c/go/+/250262
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Trust: Michael Pratt <mpratt@google.com>
---
 src/runtime/export_test.go | 61 +++++++++++++++++++++++++++++++++++++++++-----
 src/runtime/malloc.go      |  2 ++
 src/runtime/mgc.go         |  4 +++
 src/runtime/mgcscavenge.go | 18 ++++++++++++++
 src/runtime/mheap.go       | 29 +++++++++++++++++-----
 src/runtime/mpagealloc.go  | 22 +++++++++++++++++
 src/runtime/mpagecache.go  | 14 ++++++++++-
 src/runtime/proc.go        |  2 ++
 8 files changed, 139 insertions(+), 13 deletions(-)

(limited to 'src/runtime/mpagealloc.go')

diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 4ca0420d2a..44551dcaf1 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -743,7 +743,16 @@ func (c *PageCache) Alloc(npages uintptr) (uintptr, uintptr) {
 	return (*pageCache)(c).alloc(npages)
 }
 func (c *PageCache) Flush(s *PageAlloc) {
-	(*pageCache)(c).flush((*pageAlloc)(s))
+	cp := (*pageCache)(c)
+	sp := (*pageAlloc)(s)
+
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(sp.mheapLock)
+		cp.flush(sp)
+		unlock(sp.mheapLock)
+	})
 }
 
 // Expose chunk index type.
@@ -754,13 +763,41 @@ type ChunkIdx chunkIdx
 type PageAlloc pageAlloc
 
 func (p *PageAlloc) Alloc(npages uintptr) (uintptr, uintptr) {
-	return (*pageAlloc)(p).alloc(npages)
+	pp := (*pageAlloc)(p)
+
+	var addr, scav uintptr
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(pp.mheapLock)
+		addr, scav = pp.alloc(npages)
+		unlock(pp.mheapLock)
+	})
+	return addr, scav
 }
 func (p *PageAlloc) AllocToCache() PageCache {
-	return PageCache((*pageAlloc)(p).allocToCache())
+	pp := (*pageAlloc)(p)
+
+	var c PageCache
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(pp.mheapLock)
+		c = PageCache(pp.allocToCache())
+		unlock(pp.mheapLock)
+	})
+	return c
 }
 func (p *PageAlloc) Free(base, npages uintptr) {
-	(*pageAlloc)(p).free(base, npages)
+	pp := (*pageAlloc)(p)
+
+	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
+		lock(pp.mheapLock)
+		pp.free(base, npages)
+		unlock(pp.mheapLock)
+	})
 }
 func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
 	return ChunkIdx((*pageAlloc)(p).start), ChunkIdx((*pageAlloc)(p).end)
@@ -768,6 +805,8 @@ func (p *PageAlloc) Bounds() (ChunkIdx, ChunkIdx) {
 func (p *PageAlloc) Scavenge(nbytes uintptr, mayUnlock bool) (r uintptr) {
 	pp := (*pageAlloc)(p)
 	systemstack(func() {
+		// None of the tests need any higher-level locking, so we just
+		// take the lock internally.
 		lock(pp.mheapLock)
 		r = pp.scavenge(nbytes, mayUnlock)
 		unlock(pp.mheapLock)
@@ -926,7 +965,11 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 		addr := chunkBase(chunkIdx(i))
 
 		// Mark the chunk's existence in the pageAlloc.
-		p.grow(addr, pallocChunkBytes)
+		systemstack(func() {
+			lock(p.mheapLock)
+			p.grow(addr, pallocChunkBytes)
+			unlock(p.mheapLock)
+		})
 
 		// Initialize the bitmap and update pageAlloc metadata.
 		chunk := p.chunkOf(chunkIndex(addr))
@@ -957,13 +1000,19 @@ func NewPageAlloc(chunks, scav map[ChunkIdx][]BitRange) *PageAlloc {
 		}
 
 		// Update heap metadata for the allocRange calls above.
-		p.update(addr, pallocChunkPages, false, false)
+		systemstack(func() {
+			lock(p.mheapLock)
+			p.update(addr, pallocChunkPages, false, false)
+			unlock(p.mheapLock)
+		})
 	}
+
 	systemstack(func() {
 		lock(p.mheapLock)
 		p.scavengeStartGen()
 		unlock(p.mheapLock)
 	})
+
 	return (*PageAlloc)(p)
 }
 
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 0563f49d17..4b798d129c 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -627,6 +627,8 @@ func mallocinit() {
 //
 // h must be locked.
 func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+	assertLockHeld(&h.lock)
+
 	n = alignUp(n, heapArenaBytes)
 
 	// First, try the arena pre-reservation.
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index fb3c149942..185d3201ca 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -821,6 +821,8 @@ func pollFractionalWorkerExit() bool {
 //
 // mheap_.lock must be held or the world must be stopped.
 func gcSetTriggerRatio(triggerRatio float64) {
+	assertWorldStoppedOrLockHeld(&mheap_.lock)
+
 	// Compute the next GC goal, which is when the allocated heap
 	// has grown by GOGC/100 over the heap marked by the last
 	// cycle.
@@ -960,6 +962,8 @@ func gcSetTriggerRatio(triggerRatio float64) {
 //
 // mheap_.lock must be held or the world must be stopped.
 func gcEffectiveGrowthRatio() float64 {
+	assertWorldStoppedOrLockHeld(&mheap_.lock)
+
 	egogc := float64(atomic.Load64(&memstats.next_gc)-memstats.heap_marked) / float64(memstats.heap_marked)
 	if egogc < 0 {
 		// Shouldn't happen, but just in case.
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index 5843ada981..a242577bd9 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -397,6 +397,8 @@ func bgscavenge(c chan int) {
 //
 //go:systemstack
 func (p *pageAlloc) scavenge(nbytes uintptr, mayUnlock bool) uintptr {
+	assertLockHeld(p.mheapLock)
+
 	var (
 		addrs addrRange
 		gen   uint32
@@ -446,6 +448,8 @@ func printScavTrace(gen uint32, released uintptr, forced bool) {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeStartGen() {
+	assertLockHeld(p.mheapLock)
+
 	if debug.scavtrace > 0 {
 		printScavTrace(p.scav.gen, p.scav.released, false)
 	}
@@ -495,6 +499,8 @@ func (p *pageAlloc) scavengeStartGen() {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
+	assertLockHeld(p.mheapLock)
+
 	// Start by reserving the minimum.
 	r := p.scav.inUse.removeLast(p.scav.reservationBytes)
 
@@ -525,6 +531,8 @@ func (p *pageAlloc) scavengeReserve() (addrRange, uint32) {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
+	assertLockHeld(p.mheapLock)
+
 	if r.size() == 0 || gen != p.scav.gen {
 		return
 	}
@@ -552,6 +560,8 @@ func (p *pageAlloc) scavengeUnreserve(r addrRange, gen uint32) {
 //
 //go:systemstack
 func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (uintptr, addrRange) {
+	assertLockHeld(p.mheapLock)
+
 	// Defensively check if we've recieved an empty address range.
 	// If so, just return.
 	if work.size() == 0 {
@@ -610,6 +620,8 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 		// If we found something, scavenge it and return!
 		if npages != 0 {
 			work.limit = offAddr{p.scavengeRangeLocked(maxChunk, base, npages)}
+
+			assertLockHeld(p.mheapLock) // Must be locked on return.
 			return uintptr(npages) * pageSize, work
 		}
 	}
@@ -674,12 +686,16 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 		base, npages := chunk.findScavengeCandidate(pallocChunkPages-1, minPages, maxPages)
 		if npages > 0 {
 			work.limit = offAddr{p.scavengeRangeLocked(candidateChunkIdx, base, npages)}
+
+			assertLockHeld(p.mheapLock) // Must be locked on return.
 			return uintptr(npages) * pageSize, work
 		}
 
 		// We were fooled, so let's continue from where we left off.
 		work.limit = offAddr{chunkBase(candidateChunkIdx)}
 	}
+
+	assertLockHeld(p.mheapLock) // Must be locked on return.
 	return 0, work
 }
 
@@ -692,6 +708,8 @@ func (p *pageAlloc) scavengeOne(work addrRange, max uintptr, mayUnlock bool) (ui
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr {
+	assertLockHeld(p.mheapLock)
+
 	p.chunkOf(ci).scavenged.setRange(base, npages)
 
 	// Compute the full address for the start of the range.
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 14a73c0491..66a59cb999 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -483,10 +483,15 @@ func (s *mspan) layout() (size, n, total uintptr) {
 // indirect call from the fixalloc initializer, the compiler can't see
 // this.
 //
+// The heap lock must be held.
+//
 //go:nowritebarrierrec
 func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
 	h := (*mheap)(vh)
 	s := (*mspan)(p)
+
+	assertLockHeld(&h.lock)
+
 	if len(h.allspans) >= cap(h.allspans) {
 		n := 64 * 1024 / sys.PtrSize
 		if n < cap(h.allspans)*3/2 {
@@ -721,7 +726,7 @@ func (h *mheap) init() {
 //
 // reclaim implements the page-reclaimer half of the sweeper.
 //
-// h must NOT be locked.
+// h.lock must NOT be held.
 func (h *mheap) reclaim(npage uintptr) {
 	// TODO(austin): Half of the time spent freeing spans is in
 	// locking/unlocking the heap (even with low contention). We
@@ -804,6 +809,8 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 	// In particular, if a span were freed and merged concurrently
 	// with this probing heapArena.spans, it would be possible to
 	// observe arbitrary, stale span pointers.
+	assertLockHeld(&h.lock)
+
 	n0 := n
 	var nFreed uintptr
 	sg := h.sweepgen
@@ -858,6 +865,8 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 		traceGCSweepSpan((n0 - nFreed) * pageSize)
 		lock(&h.lock)
 	}
+
+	assertLockHeld(&h.lock) // Must be locked on return.
 	return nFreed
 }
 
@@ -1011,7 +1020,7 @@ func (h *mheap) allocNeedsZero(base, npage uintptr) (needZero bool) {
 // tryAllocMSpan attempts to allocate an mspan object from
 // the P-local cache, but may fail.
 //
-// h need not be locked.
+// h.lock need not be held.
 //
 // This caller must ensure that its P won't change underneath
 // it during this function. Currently to ensure that we enforce
@@ -1035,7 +1044,7 @@ func (h *mheap) tryAllocMSpan() *mspan {
 
 // allocMSpanLocked allocates an mspan object.
 //
-// h must be locked.
+// h.lock must be held.
 //
 // allocMSpanLocked must be called on the system stack because
 // its caller holds the heap lock. See mheap for details.
@@ -1044,6 +1053,8 @@ func (h *mheap) tryAllocMSpan() *mspan {
 //
 //go:systemstack
 func (h *mheap) allocMSpanLocked() *mspan {
+	assertLockHeld(&h.lock)
+
 	pp := getg().m.p.ptr()
 	if pp == nil {
 		// We don't have a p so just do the normal thing.
@@ -1065,7 +1076,7 @@ func (h *mheap) allocMSpanLocked() *mspan {
 
 // freeMSpanLocked free an mspan object.
 //
-// h must be locked.
+// h.lock must be held.
 //
 // freeMSpanLocked must be called on the system stack because
 // its caller holds the heap lock. See mheap for details.
@@ -1074,6 +1085,8 @@ func (h *mheap) allocMSpanLocked() *mspan {
 //
 //go:systemstack
 func (h *mheap) freeMSpanLocked(s *mspan) {
+	assertLockHeld(&h.lock)
+
 	pp := getg().m.p.ptr()
 	// First try to free the mspan directly to the cache.
 	if pp != nil && pp.mspancache.len < len(pp.mspancache.buf) {
@@ -1097,7 +1110,7 @@ func (h *mheap) freeMSpanLocked(s *mspan) {
 //
 // The returned span is fully initialized.
 //
-// h must not be locked.
+// h.lock must not be held.
 //
 // allocSpan must be called on the system stack both because it acquires
 // the heap lock and because it must block GC transitions.
@@ -1281,8 +1294,10 @@ HaveSpan:
 // Try to add at least npage pages of memory to the heap,
 // returning whether it worked.
 //
-// h must be locked.
+// h.lock must be held.
 func (h *mheap) grow(npage uintptr) bool {
+	assertLockHeld(&h.lock)
+
 	// We must grow the heap in whole palloc chunks.
 	ask := alignUp(npage, pallocChunkPages) * pageSize
 
@@ -1391,6 +1406,8 @@ func (h *mheap) freeManual(s *mspan, typ spanAllocType) {
 }
 
 func (h *mheap) freeSpanLocked(s *mspan, typ spanAllocType) {
+	assertLockHeld(&h.lock)
+
 	switch s.state.get() {
 	case mSpanManual:
 		if s.allocCount != 0 {
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 2af1c97e0b..dac1f39969 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -349,6 +349,8 @@ func (p *pageAlloc) chunkOf(ci chunkIdx) *pallocData {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) grow(base, size uintptr) {
+	assertLockHeld(p.mheapLock)
+
 	// Round up to chunks, since we can't deal with increments smaller
 	// than chunks. Also, sysGrow expects aligned values.
 	limit := alignUp(base+size, pallocChunkBytes)
@@ -413,6 +415,8 @@ func (p *pageAlloc) grow(base, size uintptr) {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
+	assertLockHeld(p.mheapLock)
+
 	// base, limit, start, and end are inclusive.
 	limit := base + npages*pageSize - 1
 	sc, ec := chunkIndex(base), chunkIndex(limit)
@@ -499,6 +503,8 @@ func (p *pageAlloc) update(base, npages uintptr, contig, alloc bool) {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) allocRange(base, npages uintptr) uintptr {
+	assertLockHeld(p.mheapLock)
+
 	limit := base + npages*pageSize - 1
 	sc, ec := chunkIndex(base), chunkIndex(limit)
 	si, ei := chunkPageIndex(base), chunkPageIndex(limit)
@@ -534,6 +540,8 @@ func (p *pageAlloc) allocRange(base, npages uintptr) uintptr {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) findMappedAddr(addr offAddr) offAddr {
+	assertLockHeld(p.mheapLock)
+
 	// If we're not in a test, validate first by checking mheap_.arenas.
 	// This is a fast path which is only safe to use outside of testing.
 	ai := arenaIndex(addr.addr())
@@ -568,6 +576,8 @@ func (p *pageAlloc) findMappedAddr(addr offAddr) offAddr {
 //
 // p.mheapLock must be held.
 func (p *pageAlloc) find(npages uintptr) (uintptr, offAddr) {
+	assertLockHeld(p.mheapLock)
+
 	// Search algorithm.
 	//
 	// This algorithm walks each level l of the radix tree from the root level
@@ -786,7 +796,13 @@ nextLevel:
 // should be ignored.
 //
 // p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (p *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) {
+	assertLockHeld(p.mheapLock)
+
 	// If the searchAddr refers to a region which has a higher address than
 	// any known chunk, then we know we're out of memory.
 	if chunkIndex(p.searchAddr.addr()) >= p.end {
@@ -841,7 +857,13 @@ Found:
 // free returns npages worth of memory starting at base back to the page heap.
 //
 // p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (p *pageAlloc) free(base, npages uintptr) {
+	assertLockHeld(p.mheapLock)
+
 	// If we're freeing pages below the p.searchAddr, update searchAddr.
 	if b := (offAddr{base}); b.lessThan(p.searchAddr) {
 		p.searchAddr = b
diff --git a/src/runtime/mpagecache.go b/src/runtime/mpagecache.go
index 5f76501a1c..4b5c66d8d6 100644
--- a/src/runtime/mpagecache.go
+++ b/src/runtime/mpagecache.go
@@ -71,8 +71,14 @@ func (c *pageCache) allocN(npages uintptr) (uintptr, uintptr) {
 // into s. Then, it clears the cache, such that empty returns
 // true.
 //
-// p.mheapLock must be held or the world must be stopped.
+// p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (c *pageCache) flush(p *pageAlloc) {
+	assertLockHeld(p.mheapLock)
+
 	if c.empty() {
 		return
 	}
@@ -103,7 +109,13 @@ func (c *pageCache) flush(p *pageAlloc) {
 // chunk.
 //
 // p.mheapLock must be held.
+//
+// Must run on the system stack because p.mheapLock must be held.
+//
+//go:systemstack
 func (p *pageAlloc) allocToCache() pageCache {
+	assertLockHeld(p.mheapLock)
+
 	// If the searchAddr refers to a region which has a higher address than
 	// any known chunk, then we know we're out of memory.
 	if chunkIndex(p.searchAddr.addr()) >= p.end {
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 82284e6cd6..ced27ceb3a 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -4603,7 +4603,9 @@ func (pp *p) destroy() {
 			mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i]))
 		}
 		pp.mspancache.len = 0
+		lock(&mheap_.lock)
 		pp.pcache.flush(&mheap_.pages)
+		unlock(&mheap_.lock)
 	})
 	freemcache(pp.mcache)
 	pp.mcache = nil
-- 
cgit v1.3