diff options
| author | Austin Clements <austin@google.com> | 2018-02-22 20:38:09 -0500 |
|---|---|---|
| committer | Austin Clements <austin@google.com> | 2018-02-23 21:59:50 +0000 |
| commit | ec25210564562571aeb39cdfd6e02270d7f3fb1d (patch) | |
| tree | 1ab960cd3b77148f8020fee2303f0599e5686a23 /src/runtime/mbitmap.go | |
| parent | 2dbf15e88ea33c04ccc1d0762b2cfcb3bfd8a039 (diff) | |
| download | go-ec25210564562571aeb39cdfd6e02270d7f3fb1d.tar.xz | |
runtime: support a two-level arena map
Currently, the heap arena map is a single, large array that covers
every possible arena frame in the entire address space. This is
practical up to about 48 bits of address space with 64 MB arenas.
However, there are two problems with this:
1. mips64, ppc64, and s390x support full 64-bit address spaces (though
on Linux only s390x has kernel support for 64-bit address spaces).
On these platforms, it would be good to support these larger
address spaces.
2. On Windows, processes are charged for untouched memory, so for
processes with small heaps, the mostly-untouched 32 MB arena map
plus a 64 MB arena are significant overhead. Hence, it would be
good to reduce both the arena map size and the arena size, but with
a single-level arena, these are inversely proportional.
This CL adds support for a two-level arena map. Arena frame numbers
are now divided into arenaL1Bits of L1 index and arenaL2Bits of L2
index.
At the moment, arenaL1Bits is always 0, so we effectively have a
single level map. We do a few things so that this has no cost beyond
the current single-level map:
1. We embed the L2 array directly in mheap, so if there's a single
entry in the L2 array, the representation is identical to the
current representation and there's no extra level of indirection.
2. Hot code that accesses the arena map is structured so that it
optimizes to nearly the same machine code as it does currently.
3. We make some small tweaks to hot code paths and to the inliner
itself to keep some important functions inlined despite their
now-larger ASTs. In particular, this is necessary for
heapBitsForAddr and heapBits.next.
Possibly as a result of some of the tweaks, this actually slightly
improves the performance of the x/benchmarks garbage benchmark:
name old time/op new time/op delta
Garbage/benchmem-MB=64-12 2.28ms ± 1% 2.26ms ± 1% -1.07% (p=0.000 n=17+19)
(https://perf.golang.org/search?q=upload:20180223.2)
For #23900.
Change-Id: If5164e0961754f97eb9eca58f837f36d759505ff
Reviewed-on: https://go-review.googlesource.com/96779
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
Diffstat (limited to 'src/runtime/mbitmap.go')
| -rw-r--r-- | src/runtime/mbitmap.go | 75 |
1 files changed, 49 insertions, 26 deletions
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 85d79c685b..294e3739b7 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -332,21 +332,23 @@ func (m *markBits) advance() { // // nosplit because it is used during write barriers and must not be preempted. //go:nosplit -func heapBitsForAddr(addr uintptr) heapBits { +func heapBitsForAddr(addr uintptr) (h heapBits) { // 2 bits per word, 4 pairs per byte, and a mask is hard coded. - off := addr / sys.PtrSize arena := arenaIndex(addr) - ha := mheap_.arenas[arena] + ha := mheap_.arenas[arena.l1()][arena.l2()] // The compiler uses a load for nil checking ha, but in this // case we'll almost never hit that cache line again, so it // makes more sense to do a value check. if ha == nil { - // addr is not in the heap. Crash without inhibiting inlining. - _ = *ha + // addr is not in the heap. Return nil heapBits, which + // we expect to crash in the caller. + return } - bitp := &ha.bitmap[(off/4)%heapArenaBitmapBytes] - last := &ha.bitmap[len(ha.bitmap)-1] - return heapBits{bitp, uint32(off & 3), uint32(arena), last} + h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes] + h.shift = uint32((addr / sys.PtrSize) & 3) + h.arena = uint32(arena) + h.last = &ha.bitmap[len(ha.bitmap)-1] + return } // findObject returns the base address for the heap object containing @@ -432,21 +434,39 @@ func (h heapBits) next() heapBits { h.bitp, h.shift = add1(h.bitp), 0 } else { // Move to the next arena. - h.arena++ - a := mheap_.arenas[h.arena] - if a == nil { - // We just passed the end of the object, which - // was also the end of the heap. Poison h. It - // should never be dereferenced at this point. - h.bitp, h.last = nil, nil - } else { - h.bitp, h.shift = &a.bitmap[0], 0 - h.last = &a.bitmap[len(a.bitmap)-1] - } + return h.nextArena() } return h } +// nextArena advances h to the beginning of the next heap arena. +// +// This is a slow-path helper to next. gc's inliner knows that +// heapBits.next can be inlined even though it calls this. This is +// marked noinline so it doesn't get inlined into next and cause next +// to be too big to inline. +// +//go:nosplit +//go:noinline +func (h heapBits) nextArena() heapBits { + h.arena++ + ai := arenaIdx(h.arena) + l2 := mheap_.arenas[ai.l1()] + if l2 == nil { + // We just passed the end of the object, which + // was also the end of the heap. Poison h. It + // should never be dereferenced at this point. + return heapBits{} + } + ha := l2[ai.l2()] + if ha == nil { + return heapBits{} + } + h.bitp, h.shift = &ha.bitmap[0], 0 + h.last = &ha.bitmap[len(ha.bitmap)-1] + return h +} + // forward returns the heapBits describing n pointer-sized words ahead of h in memory. // That is, if h describes address p, h.forward(n) describes p+n*ptrSize. // h.forward(1) is equivalent to h.next(), just slower. @@ -465,12 +485,13 @@ func (h heapBits) forward(n uintptr) heapBits { // We're in a new heap arena. past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1) h.arena += 1 + uint32(past/heapArenaBitmapBytes) - a := mheap_.arenas[h.arena] - if a == nil { - h.bitp, h.last = nil, nil - } else { + ai := arenaIdx(h.arena) + if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil { + a := l2[ai.l2()] h.bitp = &a.bitmap[past%heapArenaBitmapBytes] h.last = &a.bitmap[len(a.bitmap)-1] + } else { + h.bitp, h.last = nil, nil } return h } @@ -971,7 +992,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // machine instructions. outOfPlace := false - if arenaIndex(x+size-1) != uint(h.arena) { + if arenaIndex(x+size-1) != arenaIdx(h.arena) { // This object spans heap arenas, so the bitmap may be // discontiguous. Unroll it into the object instead // and then copy it out. @@ -1375,12 +1396,14 @@ Phase4: // x+size may not point to the heap, so back up one // word and then call next(). end := heapBitsForAddr(x + size - sys.PtrSize).next() - if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[end.arena].bitmap[0])) { + endAI := arenaIdx(end.arena) + if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) { // The unrolling code above walks hbitp just // past the bitmap without moving to the next // arena. Synthesize this for end.bitp. - end.bitp = addb(&mheap_.arenas[end.arena-1].bitmap[0], heapArenaBitmapBytes) end.arena-- + endAI = arenaIdx(end.arena) + end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes) end.last = nil } if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { |
