diff options
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/heapdump.go | 12 | ||||
| -rw-r--r-- | src/runtime/malloc.go | 77 | ||||
| -rw-r--r-- | src/runtime/mbitmap.go | 75 | ||||
| -rw-r--r-- | src/runtime/mheap.go | 88 |
4 files changed, 182 insertions, 70 deletions
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go index dbeaed9277..b255cbbae3 100644 --- a/src/runtime/heapdump.go +++ b/src/runtime/heapdump.go @@ -489,9 +489,15 @@ func dumpparams() { } dumpint(sys.PtrSize) var arenaStart, arenaEnd uintptr - for i, ha := range mheap_.arenas { - if ha != nil { - base := arenaBase(uint(i)) + for i1 := range mheap_.arenas { + if mheap_.arenas[i1] == nil { + continue + } + for i, ha := range mheap_.arenas[i1] { + if ha == nil { + continue + } + base := arenaBase(arenaIdx(i1)<<arenaL1Shift | arenaIdx(i)) if arenaStart == 0 || base < arenaStart { arenaStart = base } diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 6f78455c8b..bad35116b0 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -92,8 +92,10 @@ // Since arenas are aligned, the address space can be viewed as a // series of arena frames. The arena map (mheap_.arenas) maps from // arena frame number to *heapArena, or nil for parts of the address -// space not backed by the Go heap. Since arenas are large, the arena -// index is just a single-level mapping. +// space not backed by the Go heap. The arena map is structured as a +// two-level array consisting of a "L1" arena map and many "L2" arena +// maps; however, since arenas are large, on many architectures, the +// arena map consists of a single, large L2 map. // // The arena map covers the entire possible address space, allowing // the Go heap to use any part of the address space. The allocator @@ -202,11 +204,6 @@ const ( // space because doing so is cheap. // mips32 only has access to the low 2GB of virtual memory, so // we further limit it to 31 bits. - // - // The size of the arena map is proportional to - // 1<<heapAddrBits, so it's important that this not be too - // large. 48 bits is about the threshold; above that we would - // need to go to a two level arena map. heapAddrBits = _64bit*48 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle)) // maxAlloc is the maximum size of an allocation. On 64-bit, @@ -219,13 +216,49 @@ const ( // heapArenaBytes is the size of a heap arena. The heap // consists of mappings of size heapArenaBytes, aligned to // heapArenaBytes. The initial heap mapping is one arena. - heapArenaBytes = (64<<20)*_64bit + (4<<20)*(1-_64bit) + // + // This is currently 64MB on 64-bit and 4MB on 32-bit. + heapArenaBytes = 1 << logHeapArenaBytes + + // logHeapArenaBytes is log_2 of heapArenaBytes. For clarity, + // prefer using heapArenaBytes where possible (we need the + // constant to compute some other constants). + logHeapArenaBytes = (6+20)*_64bit + (2+20)*(1-_64bit) // heapArenaBitmapBytes is the size of each heap arena's bitmap. heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2) pagesPerArena = heapArenaBytes / pageSize + // arenaL1Bits is the number of bits of the arena number + // covered by the first level arena map. + // + // This number should be small, since the first level arena + // map requires PtrSize*(1<<arenaL1Bits) of space in the + // binary's BSS. It can be zero, in which case the first level + // index is effectively unused. There is a performance benefit + // to this, since the generated code can be more efficient, + // but comes at the cost of having a large L2 mapping. + arenaL1Bits = 0 + + // arenaL2Bits is the number of bits of the arena number + // covered by the second level arena index. + // + // The size of each arena map allocation is proportional to + // 1<<arenaL2Bits, so it's important that this not be too + // large. 48 bits leads to 32MB arena index allocations, which + // is about the practical threshold. + arenaL2Bits = heapAddrBits - logHeapArenaBytes - arenaL1Bits + + // arenaL1Shift is the number of bits to shift an arena frame + // number by to compute an index into the first level arena map. + arenaL1Shift = arenaL2Bits + + // arenaBits is the total bits in a combined arena map index. + // This is split between the index into the L1 arena map and + // the L2 arena map. + arenaBits = arenaL1Bits + arenaL2Bits + // arenaBaseOffset is the pointer value that corresponds to // index 0 in the heap arena map. // @@ -323,12 +356,6 @@ func mallocinit() { throw("bad system page size") } - // Map the arena map. Most of this will never be written to, - mheap_.arenas = (*[(1 << heapAddrBits) / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, nil)) - if mheap_.arenas == nil { - throw("failed to allocate arena map") - } - // Initialize the heap. mheap_.init() _g_ := getg() @@ -398,7 +425,7 @@ func mallocinit() { // 3. We try to stake out a reasonably large initial // heap reservation. - const arenaMetaSize = unsafe.Sizeof(heapArena{}) * uintptr(len(*mheap_.arenas)) + const arenaMetaSize = unsafe.Sizeof([1 << arenaBits]heapArena{}) meta := uintptr(sysReserve(nil, arenaMetaSize)) if meta != 0 { mheap_.heapArenaAlloc.init(meta, arenaMetaSize) @@ -476,7 +503,7 @@ func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { if p+n < p { // We can't use this, so don't ask. v = nil - } else if arenaIndex(p+n-1) >= uint(len(mheap_.arenas)) { + } else if arenaIndex(p+n-1) >= 1<<arenaBits { // Outside addressable heap. Can't use. v = nil } else { @@ -528,9 +555,9 @@ func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { p := uintptr(v) if p+size < p { bad = "region exceeds uintptr range" - } else if arenaIndex(p) >= uint(len(mheap_.arenas)) { + } else if arenaIndex(p) >= 1<<arenaBits { bad = "base outside usable address space" - } else if arenaIndex(p+size-1) >= uint(len(mheap_.arenas)) { + } else if arenaIndex(p+size-1) >= 1<<arenaBits { bad = "end outside usable address space" } if bad != "" { @@ -551,7 +578,17 @@ func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) { mapped: // Create arena metadata. for ri := arenaIndex(uintptr(v)); ri <= arenaIndex(uintptr(v)+size-1); ri++ { - if h.arenas[ri] != nil { + l2 := h.arenas[ri.l1()] + if l2 == nil { + // Allocate an L2 arena map. + l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil)) + if l2 == nil { + throw("out of memory allocating heap arena map") + } + atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2)) + } + + if l2[ri.l2()] != nil { throw("arena already initialized") } var r *heapArena @@ -567,7 +604,7 @@ mapped: // new heap arena becomes visible before the heap lock // is released (which shouldn't happen, but there's // little downside to this). - atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r)) + atomic.StorepNoWB(unsafe.Pointer(&l2[ri.l2()]), unsafe.Pointer(r)) } // Tell the race detector about the new heap memory. diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 85d79c685b..294e3739b7 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -332,21 +332,23 @@ func (m *markBits) advance() { // // nosplit because it is used during write barriers and must not be preempted. //go:nosplit -func heapBitsForAddr(addr uintptr) heapBits { +func heapBitsForAddr(addr uintptr) (h heapBits) { // 2 bits per word, 4 pairs per byte, and a mask is hard coded. - off := addr / sys.PtrSize arena := arenaIndex(addr) - ha := mheap_.arenas[arena] + ha := mheap_.arenas[arena.l1()][arena.l2()] // The compiler uses a load for nil checking ha, but in this // case we'll almost never hit that cache line again, so it // makes more sense to do a value check. if ha == nil { - // addr is not in the heap. Crash without inhibiting inlining. - _ = *ha + // addr is not in the heap. Return nil heapBits, which + // we expect to crash in the caller. + return } - bitp := &ha.bitmap[(off/4)%heapArenaBitmapBytes] - last := &ha.bitmap[len(ha.bitmap)-1] - return heapBits{bitp, uint32(off & 3), uint32(arena), last} + h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes] + h.shift = uint32((addr / sys.PtrSize) & 3) + h.arena = uint32(arena) + h.last = &ha.bitmap[len(ha.bitmap)-1] + return } // findObject returns the base address for the heap object containing @@ -432,21 +434,39 @@ func (h heapBits) next() heapBits { h.bitp, h.shift = add1(h.bitp), 0 } else { // Move to the next arena. - h.arena++ - a := mheap_.arenas[h.arena] - if a == nil { - // We just passed the end of the object, which - // was also the end of the heap. Poison h. It - // should never be dereferenced at this point. - h.bitp, h.last = nil, nil - } else { - h.bitp, h.shift = &a.bitmap[0], 0 - h.last = &a.bitmap[len(a.bitmap)-1] - } + return h.nextArena() } return h } +// nextArena advances h to the beginning of the next heap arena. +// +// This is a slow-path helper to next. gc's inliner knows that +// heapBits.next can be inlined even though it calls this. This is +// marked noinline so it doesn't get inlined into next and cause next +// to be too big to inline. +// +//go:nosplit +//go:noinline +func (h heapBits) nextArena() heapBits { + h.arena++ + ai := arenaIdx(h.arena) + l2 := mheap_.arenas[ai.l1()] + if l2 == nil { + // We just passed the end of the object, which + // was also the end of the heap. Poison h. It + // should never be dereferenced at this point. + return heapBits{} + } + ha := l2[ai.l2()] + if ha == nil { + return heapBits{} + } + h.bitp, h.shift = &ha.bitmap[0], 0 + h.last = &ha.bitmap[len(ha.bitmap)-1] + return h +} + // forward returns the heapBits describing n pointer-sized words ahead of h in memory. // That is, if h describes address p, h.forward(n) describes p+n*ptrSize. // h.forward(1) is equivalent to h.next(), just slower. @@ -465,12 +485,13 @@ func (h heapBits) forward(n uintptr) heapBits { // We're in a new heap arena. past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1) h.arena += 1 + uint32(past/heapArenaBitmapBytes) - a := mheap_.arenas[h.arena] - if a == nil { - h.bitp, h.last = nil, nil - } else { + ai := arenaIdx(h.arena) + if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil { + a := l2[ai.l2()] h.bitp = &a.bitmap[past%heapArenaBitmapBytes] h.last = &a.bitmap[len(a.bitmap)-1] + } else { + h.bitp, h.last = nil, nil } return h } @@ -971,7 +992,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // machine instructions. outOfPlace := false - if arenaIndex(x+size-1) != uint(h.arena) { + if arenaIndex(x+size-1) != arenaIdx(h.arena) { // This object spans heap arenas, so the bitmap may be // discontiguous. Unroll it into the object instead // and then copy it out. @@ -1375,12 +1396,14 @@ Phase4: // x+size may not point to the heap, so back up one // word and then call next(). end := heapBitsForAddr(x + size - sys.PtrSize).next() - if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[end.arena].bitmap[0])) { + endAI := arenaIdx(end.arena) + if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) { // The unrolling code above walks hbitp just // past the bitmap without moving to the next // arena. Synthesize this for end.bitp. - end.bitp = addb(&mheap_.arenas[end.arena-1].bitmap[0], heapArenaBitmapBytes) end.arena-- + endAI = arenaIdx(end.arena) + end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes) end.last = nil } if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) { diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 3460c54d72..b11853ca18 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -96,9 +96,9 @@ type mheap struct { nlargefree uint64 // number of frees for large objects (>maxsmallsize) nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) - // arenas is the heap arena map. - // arenas[(va+arenaBaseOffset)/heapArenaBytes] points to the - // metadata for the heap arena containing va. + // arenas is the heap arena map. It points to the metadata for + // the heap for every arena frame of the entire usable virtual + // address space. // // Use arenaIndex to compute indexes into this array. // @@ -110,9 +110,13 @@ type mheap struct { // transition from nil to non-nil at any time when the lock // isn't held. (Entries never transitions back to nil.) // - // This structure is fully mapped by mallocinit, so it's safe - // to probe any index. - arenas *[(1 << heapAddrBits) / heapArenaBytes]*heapArena + // In general, this is a two-level mapping consisting of an L1 + // map and possibly many L2 maps. This saves space when there + // are a huge number of arena frames. However, on many + // platforms (even 64-bit), arenaL1Bits is 0, making this + // effectively a single-level map. In this case, arenas[0] + // will never be nil. + arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena // heapArenaAlloc is pre-reserved space for allocating heapArena // objects. This is only used on 32-bit, where we pre-reserve @@ -410,24 +414,48 @@ func (sc spanClass) noscan() bool { return sc&1 != 0 } -// arenaIndex returns the mheap_.arenas index of the arena containing -// metadata for p. If p is outside the range of valid heap addresses, -// it returns an index larger than len(mheap_.arenas). +// arenaIndex returns the index into mheap_.arenas of the arena +// containing metadata for p. This index combines of an index into the +// L1 map and an index into the L2 map and should be used as +// mheap_.arenas[ai.l1()][ai.l2()]. +// +// If p is outside the range of valid heap addresses, either l1() or +// l2() will be out of bounds. // // It is nosplit because it's called by spanOf and several other // nosplit functions. // //go:nosplit -func arenaIndex(p uintptr) uint { - return uint((p + arenaBaseOffset) / heapArenaBytes) +func arenaIndex(p uintptr) arenaIdx { + return arenaIdx((p + arenaBaseOffset) / heapArenaBytes) } // arenaBase returns the low address of the region covered by heap // arena i. -func arenaBase(i uint) uintptr { +func arenaBase(i arenaIdx) uintptr { return uintptr(i)*heapArenaBytes - arenaBaseOffset } +type arenaIdx uint + +func (i arenaIdx) l1() uint { + if arenaL1Bits == 0 { + // Let the compiler optimize this away if there's no + // L1 map. + return 0 + } else { + return uint(i) >> arenaL1Shift + } +} + +func (i arenaIdx) l2() uint { + if arenaL1Bits == 0 { + return uint(i) + } else { + return uint(i) & (1<<arenaL2Bits - 1) + } +} + // inheap reports whether b is a pointer into a (potentially dead) heap object. // It returns false for pointers into _MSpanManual spans. // Non-preemptible because it is used by write barriers. @@ -467,14 +495,28 @@ func inHeapOrStack(b uintptr) bool { // //go:nosplit func spanOf(p uintptr) *mspan { - if p < minLegalPointer { - return nil - } + // This function looks big, but we use a lot of constant + // folding around arenaL1Bits to get it under the inlining + // budget. Also, many of the checks here are safety checks + // that Go needs to do anyway, so the generated code is quite + // short. ri := arenaIndex(p) - if ri >= uint(len(mheap_.arenas)) { + if arenaL1Bits == 0 { + // If there's no L1, then ri.l1() can't be out of bounds but ri.l2() can. + if ri.l2() >= uint(len(mheap_.arenas[0])) { + return nil + } + } else { + // If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't. + if ri.l1() >= uint(len(mheap_.arenas)) { + return nil + } + } + l2 := mheap_.arenas[ri.l1()] + if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1. return nil } - ha := mheap_.arenas[ri] + ha := l2[ri.l2()] if ha == nil { return nil } @@ -488,7 +530,8 @@ func spanOf(p uintptr) *mspan { // //go:nosplit func spanOfUnchecked(p uintptr) *mspan { - return mheap_.arenas[arenaIndex(p)].spans[(p/pageSize)%pagesPerArena] + ai := arenaIndex(p) + return mheap_.arenas[ai.l1()][ai.l2()].spans[(p/pageSize)%pagesPerArena] } // spanOfHeap is like spanOf, but returns nil if p does not point to a @@ -763,18 +806,21 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan { // setSpan modifies the span map so spanOf(base) is s. func (h *mheap) setSpan(base uintptr, s *mspan) { - h.arenas[arenaIndex(base)].spans[(base/pageSize)%pagesPerArena] = s + ai := arenaIndex(base) + h.arenas[ai.l1()][ai.l2()].spans[(base/pageSize)%pagesPerArena] = s } // setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize)) // is s. func (h *mheap) setSpans(base, npage uintptr, s *mspan) { p := base / pageSize - ha := h.arenas[arenaIndex(base)] + ai := arenaIndex(base) + ha := h.arenas[ai.l1()][ai.l2()] for n := uintptr(0); n < npage; n++ { i := (p + n) % pagesPerArena if i == 0 { - ha = h.arenas[arenaIndex(base+n*pageSize)] + ai = arenaIndex(base + n*pageSize) + ha = h.arenas[ai.l1()][ai.l2()] } ha.spans[i] = s } |
