runtime: redesign scavenging algorithm

Currently the runtime's scavenging algorithm involves running from the top of the heap address space to the bottom (or as far as it gets) once per GC cycle. Once it treads some ground, it doesn't tread it again until the next GC cycle. This works just fine for the background scavenger, for heap-growth scavenging, and for debug.FreeOSMemory. However, it breaks down in the face of a memory limit for small heaps in the tens of MiB. Basically, because the scavenger never retreads old ground, it's completely oblivious to new memory it could scavenge, and that it really *should* in the face of a memory limit. Also, every time some thread goes to scavenge in the runtime, it reserves what could be a considerable amount of address space, hiding it from other scavengers. This change modifies and simplifies the implementation overall. It's less code with complexities that are much better encapsulated. The current implementation iterates optimistically over the address space looking for memory to scavenge, keeping track of what it last saw. The new implementation does the same, but instead of directly iterating over pages, it iterates over chunks. It maintains an index of chunks (as a bitmap over the address space) that indicate which chunks may contain scavenge work. The page allocator populates this index, while scavengers consume it and iterate over it optimistically. This has a two key benefits: 1. Scavenging is much simpler: find a candidate chunk, and check it, essentially just using the scavengeOne fast path. There's no need for the complexity of iterating beyond one chunk, because the index is lock-free and already maintains that information. 2. If pages are freed to the page allocator (always guaranteed to be unscavenged), the page allocator immediately notifies all scavengers of the new source of work, avoiding the hiding issues of the old implementation. One downside of the new implementation, however, is that it's potentially more expensive to find pages to scavenge. In the past, if a single page would become free high up in the address space, the runtime's scavengers would ignore it. Now that scavengers won't, one or more scavengers may need to iterate potentially across the whole heap to find the next source of work. For the background scavenger, this just means a potentially less reactive scavenger -- overall it should still use the same amount of CPU. It means worse overheads for memory limit scavenging, but that's not exactly something with a baseline yet. In practice, this shouldn't be too bad, hopefully since the chunk index is extremely compact. For a 48-bit address space, the index is only 8 MiB in size at worst, but even just one physical page in the index is able to support up to 128 GiB heaps, provided they aren't terribly sparse. On 32-bit platforms, the index is only 128 bytes in size. For #48409. Change-Id: I72b7e74365046b18c64a6417224c5d85511194fb Reviewed-on: https://go-review.googlesource.com/c/go/+/399474 Reviewed-by: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
author: Michael Anthony Knyszek <mknyszek@google.com> 2022-04-10 20:34:17 +0000
committer: Michael Knyszek <mknyszek@google.com> 2022-05-03 15:13:53 +0000
commit: 91f863013e6b5ba870f6bfbfda0b735cf54fb3ca (patch)
tree: bbb4959f2ef79e59601cdf25fc882ea3ba426977 /src/runtime/mpagealloc_64bit.go
parent: b4d81147d8dc26c8f7d6822b6249311d569af1de (diff)
download: go-91f863013e6b5ba870f6bfbfda0b735cf54fb3ca.tar.xz
1 files changed, 78 insertions, 1 deletions
diff --git a/src/runtime/mpagealloc_64bit.go b/src/runtime/mpagealloc_64bit.go
index 0b99209d99..bfc3e0ad90 100644
--- a/src/runtime/mpagealloc_64bit.go
+++ b/src/runtime/mpagealloc_64bit.go
@@ -6,7 +6,10 @@
 
 package runtime
 
-import "unsafe"
+import (
+	"runtime/internal/atomic"
+	"unsafe"
+)
 
 const (
 	// The number of levels in the radix tree.
@@ -83,6 +86,12 @@ func (p *pageAlloc) sysInit() {
 		sl := notInHeapSlice{(*notInHeap)(r), 0, entries}
 		p.summary[l] = *(*[]pallocSum)(unsafe.Pointer(&sl))
 	}
+
+	// Set up the scavenge index.
+	nbytes := uintptr(1<<heapAddrBits) / pallocChunkBytes / 8
+	r := sysReserve(nil, nbytes)
+	sl := notInHeapSlice{(*notInHeap)(r), int(nbytes), int(nbytes)}
+	p.scav.index.chunks = *(*[]atomic.Uint8)(unsafe.Pointer(&sl))
 }
 
 // sysGrow performs architecture-dependent operations on heap
@@ -177,4 +186,72 @@ func (p *pageAlloc) sysGrow(base, limit uintptr) {
 		sysUsed(unsafe.Pointer(need.base.addr()), need.size(), need.size())
 		p.summaryMappedReady += need.size()
 	}
+
+	// Update the scavenge index.
+	p.summaryMappedReady += p.scav.index.grow(base, limit, p.sysStat)
+}
+
+// grow increases the index's backing store in response to a heap growth.
+//
+// Returns the amount of memory added to sysStat.
+func (s *scavengeIndex) grow(base, limit uintptr, sysStat *sysMemStat) uintptr {
+	if base%pallocChunkBytes != 0 || limit%pallocChunkBytes != 0 {
+		print("runtime: base = ", hex(base), ", limit = ", hex(limit), "\n")
+		throw("sysGrow bounds not aligned to pallocChunkBytes")
+	}
+	// Map and commit the pieces of chunks that we need.
+	//
+	// We always map the full range of the minimum heap address to the
+	// maximum heap address. We don't do this for the summary structure
+	// because it's quite large and a discontiguous heap could cause a
+	// lot of memory to be used. In this situation, the worst case overhead
+	// is in the single-digit MiB if we map the whole thing.
+	//
+	// The base address of the backing store is always page-aligned,
+	// because it comes from the OS, so it's sufficient to align the
+	// index.
+	haveMin := s.min.Load()
+	haveMax := s.max.Load()
+	needMin := int32(alignDown(uintptr(chunkIndex(base)/8), physPageSize))
+	needMax := int32(alignUp(uintptr((chunkIndex(limit)+7)/8), physPageSize))
+	// Extend the range down to what we have, if there's no overlap.
+	if needMax < haveMin {
+		needMax = haveMin
+	}
+	if needMin > haveMax {
+		needMin = haveMax
+	}
+	have := makeAddrRange(
+		// Avoid a panic from indexing one past the last element.
+		uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMin),
+		uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(haveMax),
+	)
+	need := makeAddrRange(
+		// Avoid a panic from indexing one past the last element.
+		uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMin),
+		uintptr(unsafe.Pointer(&s.chunks[0]))+uintptr(needMax),
+	)
+	// Subtract any overlap from rounding. We can't re-map memory because
+	// it'll be zeroed.
+	need = need.subtract(have)
+
+	// If we've got something to map, map it, and update the slice bounds.
+	if need.size() != 0 {
+		sysMap(unsafe.Pointer(need.base.addr()), need.size(), sysStat)
+		sysUsed(unsafe.Pointer(need.base.addr()), need.size(), need.size())
+		// Update the indices only after the new memory is valid.
+		if haveMin == 0 || needMin < haveMin {
+			s.min.Store(needMin)
+		}
+		if haveMax == 0 || needMax > haveMax {
+			s.max.Store(needMax)
+		}
+	}
+	// Update minHeapIdx. Note that even if there's no mapping work to do,
+	// we may still have a new, lower minimum heap address.
+	minHeapIdx := s.minHeapIdx.Load()
+	if baseIdx := int32(chunkIndex(base) / 8); minHeapIdx == 0 || baseIdx < minHeapIdx {
+		s.minHeapIdx.Store(baseIdx)
+	}
+	return need.size()
 }
author	Michael Anthony Knyszek <mknyszek@google.com>	2022-04-10 20:34:17 +0000
committer	Michael Knyszek <mknyszek@google.com>	2022-05-03 15:13:53 +0000
commit	91f863013e6b5ba870f6bfbfda0b735cf54fb3ca (patch)
tree	bbb4959f2ef79e59601cdf25fc882ea3ba426977 /src/runtime/mpagealloc_64bit.go
parent	b4d81147d8dc26c8f7d6822b6249311d569af1de (diff)
download	go-91f863013e6b5ba870f6bfbfda0b735cf54fb3ca.tar.xz