From acf3ff2e8a0ee777a35b42879c90a1d5a130988f Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 14 Nov 2019 23:58:50 +0000 Subject: runtime: convert page allocator bitmap to sparse array Currently the page allocator bitmap is implemented as a single giant memory mapping which is reserved at init time and committed as needed. This causes problems on systems that don't handle large uncommitted mappings well, or institute low virtual address space defaults as a memory limiting mechanism. This change modifies the implementation of the page allocator bitmap away from a directly-mapped set of bytes to a sparse array in same vein as mheap.arenas. This will hurt performance a little but the biggest gains are from the lockless allocation possible with the page allocator, so the impact of this extra layer of indirection should be minimal. In fact, this is exactly what we see: https://perf.golang.org/search?q=upload:20191125.5 This reduces the amount of mapped (PROT_NONE) memory needed on systems with 48-bit address spaces to ~600 MiB down from almost 9 GiB. The bulk of this remaining memory is used by the summaries. Go processes with 32-bit address spaces now always commit to 128 KiB of memory for the bitmap. Previously it would only commit the pages in the bitmap which represented the range of addresses (lowest address to highest address, even if there are unused regions in that range) used by the heap. Updates #35568. Updates #35451. Change-Id: I0ff10380156568642b80c366001eefd0a4e6c762 Reviewed-on: https://go-review.googlesource.com/c/go/+/207497 Run-TryBot: Michael Knyszek TryBot-Result: Gobot Gobot Reviewed-by: Austin Clements Reviewed-by: Cherry Zhang --- src/runtime/mpagealloc.go | 148 +++++++++++++++++++++++++++++----------------- 1 file changed, 94 insertions(+), 54 deletions(-) (limited to 'src/runtime/mpagealloc.go') diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go index 21ea6a8525..f48b9faec3 100644 --- a/src/runtime/mpagealloc.go +++ b/src/runtime/mpagealloc.go @@ -9,9 +9,8 @@ // // Pages are managed using a bitmap that is sharded into chunks. // In the bitmap, 1 means in-use, and 0 means free. The bitmap spans the -// process's address space. Chunks are allocated using a SLAB allocator -// and pointers to chunks are managed in one large array, which is mapped -// in as needed. +// process's address space. Chunks are managed in a sparse-array-style structure +// similar to mheap.arenas, since the bitmap may be large on some systems. // // The bitmap is efficiently searched by using a radix tree in combination // with fast bit-wise intrinsics. Allocation is performed using an address-ordered @@ -49,6 +48,7 @@ package runtime import ( + "runtime/internal/atomic" "unsafe" ) @@ -74,6 +74,14 @@ const ( summaryLevelBits = 3 summaryL0Bits = heapAddrBits - logPallocChunkBytes - (summaryLevels-1)*summaryLevelBits + // pallocChunksL2Bits is the number of bits of the chunk index number + // covered by the second level of the chunks map. + // + // See (*pageAlloc).chunks for more details. Update the documentation + // there should this change. + pallocChunksL2Bits = heapAddrBits - logPallocChunkBytes - pallocChunksL1Bits + pallocChunksL1Shift = pallocChunksL2Bits + // Maximum searchAddr value, which indicates that the heap has no free space. // // We subtract arenaBaseOffset because we want this to represent the maximum @@ -111,6 +119,26 @@ func chunkPageIndex(p uintptr) uint { return uint(p % pallocChunkBytes / pageSize) } +// l1 returns the index into the first level of (*pageAlloc).chunks. +func (i chunkIdx) l1() uint { + if pallocChunksL1Bits == 0 { + // Let the compiler optimize this away if there's no + // L1 map. + return 0 + } else { + return uint(i) >> pallocChunksL1Shift + } +} + +// l2 returns the index into the second level of (*pageAlloc).chunks. +func (i chunkIdx) l2() uint { + if pallocChunksL1Bits == 0 { + return uint(i) + } else { + return uint(i) & (1< s.end { s.end = end - - // s.end corresponds directly to the length of s.chunks, - // so just update it here. - s.chunks = s.chunks[:end] } - // Extend the mapped part of the chunk reservation. - elemSize := unsafe.Sizeof(s.chunks[0]) - extendMappedRegion( - unsafe.Pointer(&s.chunks[0]), - uintptr(oldStart)*elemSize, - uintptr(oldEnd)*elemSize, - uintptr(s.start)*elemSize, - uintptr(s.end)*elemSize, - s.sysStat, - ) - // A grow operation is a lot like a free operation, so if our // chunk ends up below the (linearized) s.searchAddr, update // s.searchAddr to the new address, just like in free. @@ -364,11 +389,21 @@ func (s *pageAlloc) grow(base, size uintptr) { s.searchAddr = base } - // Newly-grown memory is always considered scavenged. + // Add entries into chunks, which is sparse, if needed. Then, + // initialize the bitmap. // + // Newly-grown memory is always considered scavenged. // Set all the bits in the scavenged bitmaps high. for c := chunkIndex(base); c < chunkIndex(limit); c++ { - s.chunks[c].scavenged.setRange(0, pallocChunkPages) + if s.chunks[c.l1()] == nil { + // Create the necessary l2 entry. + // + // Store it atomically to avoid races with readers which + // don't acquire the heap lock. + r := sysAlloc(unsafe.Sizeof(*s.chunks[0]), s.sysStat) + atomic.StorepNoWB(unsafe.Pointer(&s.chunks[c.l1()]), r) + } + s.chunkOf(c).scavenged.setRange(0, pallocChunkPages) } // Update summaries accordingly. The grow acts like a free, so @@ -395,7 +430,7 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) { // Fast path: the allocation doesn't span more than one chunk, // so update this one and if the summary didn't change, return. x := s.summary[len(s.summary)-1][sc] - y := s.chunks[sc].summarize() + y := s.chunkOf(sc).summarize() if x == y { return } @@ -406,7 +441,7 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) { summary := s.summary[len(s.summary)-1] // Update the summary for chunk sc. - summary[sc] = s.chunks[sc].summarize() + summary[sc] = s.chunkOf(sc).summarize() // Update the summaries for chunks in between, which are // either totally allocated or freed. @@ -423,7 +458,7 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) { } // Update the summary for chunk ec. - summary[ec] = s.chunks[ec].summarize() + summary[ec] = s.chunkOf(ec).summarize() } else { // Slow general path: the allocation spans more than one chunk // and at least one summary is guaranteed to change. @@ -432,7 +467,7 @@ func (s *pageAlloc) update(base, npages uintptr, contig, alloc bool) { // every chunk in the range and manually recompute the summary. summary := s.summary[len(s.summary)-1] for c := sc; c <= ec; c++ { - summary[c] = s.chunks[c].summarize() + summary[c] = s.chunkOf(c).summarize() } } @@ -479,18 +514,22 @@ func (s *pageAlloc) allocRange(base, npages uintptr) uintptr { scav := uint(0) if sc == ec { // The range doesn't cross any chunk boundaries. - scav += s.chunks[sc].scavenged.popcntRange(si, ei+1-si) - s.chunks[sc].allocRange(si, ei+1-si) + chunk := s.chunkOf(sc) + scav += chunk.scavenged.popcntRange(si, ei+1-si) + chunk.allocRange(si, ei+1-si) } else { // The range crosses at least one chunk boundary. - scav += s.chunks[sc].scavenged.popcntRange(si, pallocChunkPages-si) - s.chunks[sc].allocRange(si, pallocChunkPages-si) + chunk := s.chunkOf(sc) + scav += chunk.scavenged.popcntRange(si, pallocChunkPages-si) + chunk.allocRange(si, pallocChunkPages-si) for c := sc + 1; c < ec; c++ { - scav += s.chunks[c].scavenged.popcntRange(0, pallocChunkPages) - s.chunks[c].allocAll() + chunk := s.chunkOf(c) + scav += chunk.scavenged.popcntRange(0, pallocChunkPages) + chunk.allocAll() } - scav += s.chunks[ec].scavenged.popcntRange(0, ei+1) - s.chunks[ec].allocRange(0, ei+1) + chunk = s.chunkOf(ec) + scav += chunk.scavenged.popcntRange(0, ei+1) + chunk.allocRange(0, ei+1) } s.update(base, npages, true, true) return uintptr(scav) * pageSize @@ -702,7 +741,7 @@ nextLevel: // After iterating over all levels, i must contain a chunk index which // is what the final level represents. ci := chunkIdx(i) - j, searchIdx := s.chunks[ci].find(npages, 0) + j, searchIdx := s.chunkOf(ci).find(npages, 0) if j < 0 { // We couldn't find any space in this chunk despite the summaries telling // us it should be there. There's likely a bug, so dump some state and throw. @@ -744,7 +783,7 @@ func (s *pageAlloc) alloc(npages uintptr) (addr uintptr, scav uintptr) { // npages is guaranteed to be no greater than pallocChunkPages here. i := chunkIndex(s.searchAddr) if max := s.summary[len(s.summary)-1][i].max(); max >= uint(npages) { - j, searchIdx := s.chunks[i].find(npages, chunkPageIndex(s.searchAddr)) + j, searchIdx := s.chunkOf(i).find(npages, chunkPageIndex(s.searchAddr)) if j < 0 { print("runtime: max = ", max, ", npages = ", npages, "\n") print("runtime: searchIdx = ", chunkPageIndex(s.searchAddr), ", s.searchAddr = ", hex(s.searchAddr), "\n") @@ -793,7 +832,8 @@ func (s *pageAlloc) free(base, npages uintptr) { if npages == 1 { // Fast path: we're clearing a single bit, and we know exactly // where it is, so mark it directly. - s.chunks[chunkIndex(base)].free1(chunkPageIndex(base)) + i := chunkIndex(base) + s.chunkOf(i).free1(chunkPageIndex(base)) } else { // Slow path: we're clearing more bits so we may need to iterate. limit := base + npages*pageSize - 1 @@ -802,14 +842,14 @@ func (s *pageAlloc) free(base, npages uintptr) { if sc == ec { // The range doesn't cross any chunk boundaries. - s.chunks[sc].free(si, ei+1-si) + s.chunkOf(sc).free(si, ei+1-si) } else { // The range crosses at least one chunk boundary. - s.chunks[sc].free(si, pallocChunkPages-si) + s.chunkOf(sc).free(si, pallocChunkPages-si) for c := sc + 1; c < ec; c++ { - s.chunks[c].freeAll() + s.chunkOf(c).freeAll() } - s.chunks[ec].free(0, ei+1) + s.chunkOf(ec).free(0, ei+1) } } s.update(base, npages, true, false) -- cgit v1.3