aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/mbitmap.go
diff options
context:
space:
mode:
authorAustin Clements <austin@google.com>2018-02-22 20:38:09 -0500
committerAustin Clements <austin@google.com>2018-02-23 21:59:50 +0000
commitec25210564562571aeb39cdfd6e02270d7f3fb1d (patch)
tree1ab960cd3b77148f8020fee2303f0599e5686a23 /src/runtime/mbitmap.go
parent2dbf15e88ea33c04ccc1d0762b2cfcb3bfd8a039 (diff)
downloadgo-ec25210564562571aeb39cdfd6e02270d7f3fb1d.tar.xz
runtime: support a two-level arena map
Currently, the heap arena map is a single, large array that covers every possible arena frame in the entire address space. This is practical up to about 48 bits of address space with 64 MB arenas. However, there are two problems with this: 1. mips64, ppc64, and s390x support full 64-bit address spaces (though on Linux only s390x has kernel support for 64-bit address spaces). On these platforms, it would be good to support these larger address spaces. 2. On Windows, processes are charged for untouched memory, so for processes with small heaps, the mostly-untouched 32 MB arena map plus a 64 MB arena are significant overhead. Hence, it would be good to reduce both the arena map size and the arena size, but with a single-level arena, these are inversely proportional. This CL adds support for a two-level arena map. Arena frame numbers are now divided into arenaL1Bits of L1 index and arenaL2Bits of L2 index. At the moment, arenaL1Bits is always 0, so we effectively have a single level map. We do a few things so that this has no cost beyond the current single-level map: 1. We embed the L2 array directly in mheap, so if there's a single entry in the L2 array, the representation is identical to the current representation and there's no extra level of indirection. 2. Hot code that accesses the arena map is structured so that it optimizes to nearly the same machine code as it does currently. 3. We make some small tweaks to hot code paths and to the inliner itself to keep some important functions inlined despite their now-larger ASTs. In particular, this is necessary for heapBitsForAddr and heapBits.next. Possibly as a result of some of the tweaks, this actually slightly improves the performance of the x/benchmarks garbage benchmark: name old time/op new time/op delta Garbage/benchmem-MB=64-12 2.28ms ± 1% 2.26ms ± 1% -1.07% (p=0.000 n=17+19) (https://perf.golang.org/search?q=upload:20180223.2) For #23900. Change-Id: If5164e0961754f97eb9eca58f837f36d759505ff Reviewed-on: https://go-review.googlesource.com/96779 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
Diffstat (limited to 'src/runtime/mbitmap.go')
-rw-r--r--src/runtime/mbitmap.go75
1 files changed, 49 insertions, 26 deletions
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 85d79c685b..294e3739b7 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -332,21 +332,23 @@ func (m *markBits) advance() {
//
// nosplit because it is used during write barriers and must not be preempted.
//go:nosplit
-func heapBitsForAddr(addr uintptr) heapBits {
+func heapBitsForAddr(addr uintptr) (h heapBits) {
// 2 bits per word, 4 pairs per byte, and a mask is hard coded.
- off := addr / sys.PtrSize
arena := arenaIndex(addr)
- ha := mheap_.arenas[arena]
+ ha := mheap_.arenas[arena.l1()][arena.l2()]
// The compiler uses a load for nil checking ha, but in this
// case we'll almost never hit that cache line again, so it
// makes more sense to do a value check.
if ha == nil {
- // addr is not in the heap. Crash without inhibiting inlining.
- _ = *ha
+ // addr is not in the heap. Return nil heapBits, which
+ // we expect to crash in the caller.
+ return
}
- bitp := &ha.bitmap[(off/4)%heapArenaBitmapBytes]
- last := &ha.bitmap[len(ha.bitmap)-1]
- return heapBits{bitp, uint32(off & 3), uint32(arena), last}
+ h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes]
+ h.shift = uint32((addr / sys.PtrSize) & 3)
+ h.arena = uint32(arena)
+ h.last = &ha.bitmap[len(ha.bitmap)-1]
+ return
}
// findObject returns the base address for the heap object containing
@@ -432,21 +434,39 @@ func (h heapBits) next() heapBits {
h.bitp, h.shift = add1(h.bitp), 0
} else {
// Move to the next arena.
- h.arena++
- a := mheap_.arenas[h.arena]
- if a == nil {
- // We just passed the end of the object, which
- // was also the end of the heap. Poison h. It
- // should never be dereferenced at this point.
- h.bitp, h.last = nil, nil
- } else {
- h.bitp, h.shift = &a.bitmap[0], 0
- h.last = &a.bitmap[len(a.bitmap)-1]
- }
+ return h.nextArena()
}
return h
}
+// nextArena advances h to the beginning of the next heap arena.
+//
+// This is a slow-path helper to next. gc's inliner knows that
+// heapBits.next can be inlined even though it calls this. This is
+// marked noinline so it doesn't get inlined into next and cause next
+// to be too big to inline.
+//
+//go:nosplit
+//go:noinline
+func (h heapBits) nextArena() heapBits {
+ h.arena++
+ ai := arenaIdx(h.arena)
+ l2 := mheap_.arenas[ai.l1()]
+ if l2 == nil {
+ // We just passed the end of the object, which
+ // was also the end of the heap. Poison h. It
+ // should never be dereferenced at this point.
+ return heapBits{}
+ }
+ ha := l2[ai.l2()]
+ if ha == nil {
+ return heapBits{}
+ }
+ h.bitp, h.shift = &ha.bitmap[0], 0
+ h.last = &ha.bitmap[len(ha.bitmap)-1]
+ return h
+}
+
// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
// h.forward(1) is equivalent to h.next(), just slower.
@@ -465,12 +485,13 @@ func (h heapBits) forward(n uintptr) heapBits {
// We're in a new heap arena.
past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1)
h.arena += 1 + uint32(past/heapArenaBitmapBytes)
- a := mheap_.arenas[h.arena]
- if a == nil {
- h.bitp, h.last = nil, nil
- } else {
+ ai := arenaIdx(h.arena)
+ if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil {
+ a := l2[ai.l2()]
h.bitp = &a.bitmap[past%heapArenaBitmapBytes]
h.last = &a.bitmap[len(a.bitmap)-1]
+ } else {
+ h.bitp, h.last = nil, nil
}
return h
}
@@ -971,7 +992,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// machine instructions.
outOfPlace := false
- if arenaIndex(x+size-1) != uint(h.arena) {
+ if arenaIndex(x+size-1) != arenaIdx(h.arena) {
// This object spans heap arenas, so the bitmap may be
// discontiguous. Unroll it into the object instead
// and then copy it out.
@@ -1375,12 +1396,14 @@ Phase4:
// x+size may not point to the heap, so back up one
// word and then call next().
end := heapBitsForAddr(x + size - sys.PtrSize).next()
- if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[end.arena].bitmap[0])) {
+ endAI := arenaIdx(end.arena)
+ if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) {
// The unrolling code above walks hbitp just
// past the bitmap without moving to the next
// arena. Synthesize this for end.bitp.
- end.bitp = addb(&mheap_.arenas[end.arena-1].bitmap[0], heapArenaBitmapBytes)
end.arena--
+ endAI = arenaIdx(end.arena)
+ end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes)
end.last = nil
}
if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {