diff options
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/malloc.go | 8 | ||||
| -rw-r--r-- | src/runtime/mgcmark.go | 133 | ||||
| -rw-r--r-- | src/runtime/mgcsweep.go | 4 | ||||
| -rw-r--r-- | src/runtime/mheap.go | 82 |
4 files changed, 193 insertions, 34 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index e1ec5e6496..29e0071b3c 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -465,6 +465,14 @@ func mallocinit() { physHugePageShift++ } } + if pagesPerArena%pagesPerSpanRoot != 0 { + print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerSpanRoot (", pagesPerSpanRoot, ")\n") + throw("bad pagesPerSpanRoot") + } + if pagesPerArena%pagesPerReclaimerChunk != 0 { + print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerReclaimerChunk (", pagesPerReclaimerChunk, ")\n") + throw("bad pagesPerReclaimerChunk") + } // Initialize the heap. mheap_.init() diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 301d8020f1..ea73ccc1b1 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -21,10 +21,6 @@ const ( // BSS root. rootBlockBytes = 256 << 10 - // rootBlockSpans is the number of spans to scan per span - // root. - rootBlockSpans = 8 * 1024 // 64MB worth of spans - // maxObletBytes is the maximum bytes of an object to scan at // once. Larger objects will be split up into "oblets" of at // most this size. Since we can scan 1–2 MB/ms, 128 KB bounds @@ -41,14 +37,26 @@ const ( // a syscall, so its overhead is nontrivial). Higher values // make the system less responsive to incoming work. drainCheckThreshold = 100000 + + // pagesPerSpanRoot indicates how many pages to scan from a span root + // at a time. Used by special root marking. + // + // Higher values improve throughput by increasing locality, but + // increase the minimum latency of a marking operation. + // + // Must be a multiple of the pageInUse bitmap element size and + // must also evenly divide pagesPerArena. + pagesPerSpanRoot = 512 + + // go115NewMarkrootSpans is a feature flag that indicates whether + // to use the new bitmap-based markrootSpans implementation. + go115NewMarkrootSpans = true ) // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and // some miscellany) and initializes scanning-related state. // // The world must be stopped. -// -//go:nowritebarrier func gcMarkRootPrepare() { work.nFlushCacheRoots = 0 @@ -79,13 +87,24 @@ func gcMarkRootPrepare() { // // We depend on addfinalizer to mark objects that get // finalizers after root marking. - // - // We're only interested in scanning the in-use spans, - // which will all be swept at this point. More spans - // may be added to this list during concurrent GC, but - // we only care about spans that were allocated before - // this mark phase. - work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() + if go115NewMarkrootSpans { + // We're going to scan the whole heap (that was available at the time the + // mark phase started, i.e. markArenas) for in-use spans which have specials. + // + // Break up the work into arenas, and further into chunks. + // + // Snapshot allArenas as markArenas. This snapshot is safe because allArenas + // is append-only. + mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)] + work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot) + } else { + // We're only interested in scanning the in-use spans, + // which will all be swept at this point. More spans + // may be added to this list during concurrent GC, but + // we only care about spans that were allocated before + // this mark phase. + work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks() + } // Scan stacks. // @@ -293,10 +312,96 @@ func markrootFreeGStacks() { unlock(&sched.gFree.lock) } -// markrootSpans marks roots for one shard of work.spans. +// markrootSpans marks roots for one shard of markArenas. // //go:nowritebarrier func markrootSpans(gcw *gcWork, shard int) { + if !go115NewMarkrootSpans { + oldMarkrootSpans(gcw, shard) + return + } + // Objects with finalizers have two GC-related invariants: + // + // 1) Everything reachable from the object must be marked. + // This ensures that when we pass the object to its finalizer, + // everything the finalizer can reach will be retained. + // + // 2) Finalizer specials (which are not in the garbage + // collected heap) are roots. In practice, this means the fn + // field must be scanned. + sg := mheap_.sweepgen + + // Find the arena and page index into that arena for this shard. + ai := mheap_.markArenas[shard/(pagesPerArena/pagesPerSpanRoot)] + ha := mheap_.arenas[ai.l1()][ai.l2()] + arenaPage := uint(uintptr(shard) * pagesPerSpanRoot % pagesPerArena) + + // Construct slice of bitmap which we'll iterate over. + specialsbits := ha.pageSpecials[arenaPage/8:] + specialsbits = specialsbits[:pagesPerSpanRoot/8] + for i := range specialsbits { + // Find set bits, which correspond to spans with specials. + specials := atomic.Load8(&specialsbits[i]) + if specials == 0 { + continue + } + for j := uint(0); j < 8; j++ { + if specials&(1<<j) == 0 { + continue + } + // Find the span for this bit. + // + // This value is guaranteed to be non-nil because having + // specials implies that the span is in-use, and since we're + // currently marking we can be sure that we don't have to worry + // about the span being freed and re-used. + s := ha.spans[arenaPage+uint(i)*8+j] + + // The state must be mSpanInUse if the specials bit is set, so + // sanity check that. + if state := s.state.get(); state != mSpanInUse { + print("s.state = ", state, "\n") + throw("non in-use span found with specials bit set") + } + // Check that this span was swept (it may be cached or uncached). + if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) { + // sweepgen was updated (+2) during non-checkmark GC pass + print("sweep ", s.sweepgen, " ", sg, "\n") + throw("gc: unswept span") + } + + // Lock the specials to prevent a special from being + // removed from the list while we're traversing it. + lock(&s.speciallock) + for sp := s.specials; sp != nil; sp = sp.next { + if sp.kind != _KindSpecialFinalizer { + continue + } + // don't mark finalized object, but scan it so we + // retain everything it points to. + spf := (*specialfinalizer)(unsafe.Pointer(sp)) + // A finalizer can be set for an inner byte of an object, find object beginning. + p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize + + // Mark everything that can be reached from + // the object (but *not* the object itself or + // we'll never collect it). + scanobject(p, gcw) + + // The special itself is a root. + scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil) + } + unlock(&s.speciallock) + } + } +} + +// oldMarkrootSpans marks roots for one shard of work.spans. +// +// For go115NewMarkrootSpans = false. +// +//go:nowritebarrier +func oldMarkrootSpans(gcw *gcWork, shard int) { // Objects with finalizers have two GC-related invariants: // // 1) Everything reachable from the object must be marked. diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index c075f66b12..c63db24b33 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -246,6 +246,7 @@ func (s *mspan) sweep(preserve bool) bool { // 2. A tiny object can have several finalizers setup for different offsets. // If such object is not marked, we need to queue all finalizers at once. // Both 1 and 2 are possible at the same time. + hadSpecials := s.specials != nil specialp := &s.specials special := *specialp for special != nil { @@ -290,6 +291,9 @@ func (s *mspan) sweep(preserve bool) bool { special = *specialp } } + if go115NewMarkrootSpans && hadSpecials && s.specials == nil { + spanHasNoSpecials(s) + } if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled { // Find all newly freed objects. This doesn't have to diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 9bb33b2000..9448748603 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -27,6 +27,23 @@ const ( // maxPhysHugePageSize sets an upper-bound on the maximum huge page size // that the runtime supports. maxPhysHugePageSize = pallocChunkBytes + + // pagesPerReclaimerChunk indicates how many pages to scan from the + // pageInUse bitmap at a time. Used by the page reclaimer. + // + // Higher values reduce contention on scanning indexes (such as + // h.reclaimIndex), but increase the minimum latency of the + // operation. + // + // The time required to scan this many pages can vary a lot depending + // on how many spans are actually freed. Experimentally, it can + // scan for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only + // free spans at ~32 MB/ms. Using 512 pages bounds this at + // roughly 100µs. + // + // Must be a multiple of the pageInUse bitmap element size and + // must also evenly divid pagesPerArena. + pagesPerReclaimerChunk = 512 ) // Main malloc heap. @@ -180,13 +197,19 @@ type mheap struct { // simply blocking GC (by disabling preemption). sweepArenas []arenaIdx + // markArenas is a snapshot of allArenas taken at the beginning + // of the mark cycle. Because allArenas is append-only, neither + // this slice nor its contents will change during the mark, so + // it can be read safely. + markArenas []arenaIdx + // curArena is the arena that the heap is currently growing // into. This should always be physPageSize-aligned. curArena struct { base, end uintptr } - _ uint32 // ensure 64-bit alignment of central + // _ uint32 // ensure 64-bit alignment of central // central free lists for small size classes. // the padding makes sure that the mcentrals are @@ -256,6 +279,16 @@ type heapArena struct { // operations. pageMarks [pagesPerArena / 8]uint8 + // pageSpecials is a bitmap that indicates which spans have + // specials (finalizers or other). Like pageInUse, only the bit + // corresponding to the first page in each span is used. + // + // Writes are done atomically whenever a special is added to + // a span and whenever the last special is removed from a span. + // Reads are done atomically to find spans containing specials + // during marking. + pageSpecials [pagesPerArena / 8]uint8 + // zeroedBase marks the first byte of the first page in this // arena which hasn't been used yet and is therefore already // zero. zeroedBase is relative to the arena base. @@ -706,23 +739,10 @@ func (h *mheap) init() { // // h must NOT be locked. func (h *mheap) reclaim(npage uintptr) { - // This scans pagesPerChunk at a time. Higher values reduce - // contention on h.reclaimPos, but increase the minimum - // latency of performing a reclaim. - // - // Must be a multiple of the pageInUse bitmap element size. - // - // The time required by this can vary a lot depending on how - // many spans are actually freed. Experimentally, it can scan - // for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only - // free spans at ~32 MB/ms. Using 512 pages bounds this at - // roughly 100µs. - // // TODO(austin): Half of the time spent freeing spans is in // locking/unlocking the heap (even with low contention). We // could make the slow path here several times faster by // batching heap frees. - const pagesPerChunk = 512 // Bail early if there's no more reclaim work. if atomic.Load64(&h.reclaimIndex) >= 1<<63 { @@ -755,7 +775,7 @@ func (h *mheap) reclaim(npage uintptr) { } // Claim a chunk of work. - idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerChunk) - pagesPerChunk) + idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerReclaimerChunk) - pagesPerReclaimerChunk) if idx/pagesPerArena >= uintptr(len(arenas)) { // Page reclaiming is done. atomic.Store64(&h.reclaimIndex, 1<<63) @@ -769,7 +789,7 @@ func (h *mheap) reclaim(npage uintptr) { } // Scan this chunk. - nfound := h.reclaimChunk(arenas, idx, pagesPerChunk) + nfound := h.reclaimChunk(arenas, idx, pagesPerReclaimerChunk) if nfound <= npage { npage -= nfound } else { @@ -1593,6 +1613,22 @@ type special struct { kind byte // kind of special } +// spanHasSpecials marks a span as having specials in the arena bitmap. +func spanHasSpecials(s *mspan) { + arenaPage := (s.base() / pageSize) % pagesPerArena + ai := arenaIndex(s.base()) + ha := mheap_.arenas[ai.l1()][ai.l2()] + atomic.Or8(&ha.pageSpecials[arenaPage/8], uint8(1)<<(arenaPage%8)) +} + +// spanHasNoSpecials marks a span as having no specials in the arena bitmap. +func spanHasNoSpecials(s *mspan) { + arenaPage := (s.base() / pageSize) % pagesPerArena + ai := arenaIndex(s.base()) + ha := mheap_.arenas[ai.l1()][ai.l2()] + atomic.And8(&ha.pageSpecials[arenaPage/8], ^(uint8(1) << (arenaPage % 8))) +} + // Adds the special record s to the list of special records for // the object p. All fields of s should be filled in except for // offset & next, which this routine will fill in. @@ -1638,6 +1674,9 @@ func addspecial(p unsafe.Pointer, s *special) bool { s.offset = uint16(offset) s.next = *t *t = s + if go115NewMarkrootSpans { + spanHasSpecials(span) + } unlock(&span.speciallock) releasem(mp) @@ -1661,6 +1700,7 @@ func removespecial(p unsafe.Pointer, kind uint8) *special { offset := uintptr(p) - span.base() + var result *special lock(&span.speciallock) t := &span.specials for { @@ -1672,15 +1712,17 @@ func removespecial(p unsafe.Pointer, kind uint8) *special { // "interior" specials (p must be exactly equal to s->offset). if offset == uintptr(s.offset) && kind == s.kind { *t = s.next - unlock(&span.speciallock) - releasem(mp) - return s + result = s + break } t = &s.next } + if go115NewMarkrootSpans && span.specials == nil { + spanHasNoSpecials(span) + } unlock(&span.speciallock) releasem(mp) - return nil + return result } // The described object has a finalizer set for it. |
