aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/runtime/malloc.go8
-rw-r--r--src/runtime/mgcmark.go133
-rw-r--r--src/runtime/mgcsweep.go4
-rw-r--r--src/runtime/mheap.go82
4 files changed, 193 insertions, 34 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index e1ec5e6496..29e0071b3c 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -465,6 +465,14 @@ func mallocinit() {
physHugePageShift++
}
}
+ if pagesPerArena%pagesPerSpanRoot != 0 {
+ print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerSpanRoot (", pagesPerSpanRoot, ")\n")
+ throw("bad pagesPerSpanRoot")
+ }
+ if pagesPerArena%pagesPerReclaimerChunk != 0 {
+ print("pagesPerArena (", pagesPerArena, ") is not divisible by pagesPerReclaimerChunk (", pagesPerReclaimerChunk, ")\n")
+ throw("bad pagesPerReclaimerChunk")
+ }
// Initialize the heap.
mheap_.init()
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 301d8020f1..ea73ccc1b1 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -21,10 +21,6 @@ const (
// BSS root.
rootBlockBytes = 256 << 10
- // rootBlockSpans is the number of spans to scan per span
- // root.
- rootBlockSpans = 8 * 1024 // 64MB worth of spans
-
// maxObletBytes is the maximum bytes of an object to scan at
// once. Larger objects will be split up into "oblets" of at
// most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
@@ -41,14 +37,26 @@ const (
// a syscall, so its overhead is nontrivial). Higher values
// make the system less responsive to incoming work.
drainCheckThreshold = 100000
+
+ // pagesPerSpanRoot indicates how many pages to scan from a span root
+ // at a time. Used by special root marking.
+ //
+ // Higher values improve throughput by increasing locality, but
+ // increase the minimum latency of a marking operation.
+ //
+ // Must be a multiple of the pageInUse bitmap element size and
+ // must also evenly divide pagesPerArena.
+ pagesPerSpanRoot = 512
+
+ // go115NewMarkrootSpans is a feature flag that indicates whether
+ // to use the new bitmap-based markrootSpans implementation.
+ go115NewMarkrootSpans = true
)
// gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
// some miscellany) and initializes scanning-related state.
//
// The world must be stopped.
-//
-//go:nowritebarrier
func gcMarkRootPrepare() {
work.nFlushCacheRoots = 0
@@ -79,13 +87,24 @@ func gcMarkRootPrepare() {
//
// We depend on addfinalizer to mark objects that get
// finalizers after root marking.
- //
- // We're only interested in scanning the in-use spans,
- // which will all be swept at this point. More spans
- // may be added to this list during concurrent GC, but
- // we only care about spans that were allocated before
- // this mark phase.
- work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
+ if go115NewMarkrootSpans {
+ // We're going to scan the whole heap (that was available at the time the
+ // mark phase started, i.e. markArenas) for in-use spans which have specials.
+ //
+ // Break up the work into arenas, and further into chunks.
+ //
+ // Snapshot allArenas as markArenas. This snapshot is safe because allArenas
+ // is append-only.
+ mheap_.markArenas = mheap_.allArenas[:len(mheap_.allArenas):len(mheap_.allArenas)]
+ work.nSpanRoots = len(mheap_.markArenas) * (pagesPerArena / pagesPerSpanRoot)
+ } else {
+ // We're only interested in scanning the in-use spans,
+ // which will all be swept at this point. More spans
+ // may be added to this list during concurrent GC, but
+ // we only care about spans that were allocated before
+ // this mark phase.
+ work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
+ }
// Scan stacks.
//
@@ -293,10 +312,96 @@ func markrootFreeGStacks() {
unlock(&sched.gFree.lock)
}
-// markrootSpans marks roots for one shard of work.spans.
+// markrootSpans marks roots for one shard of markArenas.
//
//go:nowritebarrier
func markrootSpans(gcw *gcWork, shard int) {
+ if !go115NewMarkrootSpans {
+ oldMarkrootSpans(gcw, shard)
+ return
+ }
+ // Objects with finalizers have two GC-related invariants:
+ //
+ // 1) Everything reachable from the object must be marked.
+ // This ensures that when we pass the object to its finalizer,
+ // everything the finalizer can reach will be retained.
+ //
+ // 2) Finalizer specials (which are not in the garbage
+ // collected heap) are roots. In practice, this means the fn
+ // field must be scanned.
+ sg := mheap_.sweepgen
+
+ // Find the arena and page index into that arena for this shard.
+ ai := mheap_.markArenas[shard/(pagesPerArena/pagesPerSpanRoot)]
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ arenaPage := uint(uintptr(shard) * pagesPerSpanRoot % pagesPerArena)
+
+ // Construct slice of bitmap which we'll iterate over.
+ specialsbits := ha.pageSpecials[arenaPage/8:]
+ specialsbits = specialsbits[:pagesPerSpanRoot/8]
+ for i := range specialsbits {
+ // Find set bits, which correspond to spans with specials.
+ specials := atomic.Load8(&specialsbits[i])
+ if specials == 0 {
+ continue
+ }
+ for j := uint(0); j < 8; j++ {
+ if specials&(1<<j) == 0 {
+ continue
+ }
+ // Find the span for this bit.
+ //
+ // This value is guaranteed to be non-nil because having
+ // specials implies that the span is in-use, and since we're
+ // currently marking we can be sure that we don't have to worry
+ // about the span being freed and re-used.
+ s := ha.spans[arenaPage+uint(i)*8+j]
+
+ // The state must be mSpanInUse if the specials bit is set, so
+ // sanity check that.
+ if state := s.state.get(); state != mSpanInUse {
+ print("s.state = ", state, "\n")
+ throw("non in-use span found with specials bit set")
+ }
+ // Check that this span was swept (it may be cached or uncached).
+ if !useCheckmark && !(s.sweepgen == sg || s.sweepgen == sg+3) {
+ // sweepgen was updated (+2) during non-checkmark GC pass
+ print("sweep ", s.sweepgen, " ", sg, "\n")
+ throw("gc: unswept span")
+ }
+
+ // Lock the specials to prevent a special from being
+ // removed from the list while we're traversing it.
+ lock(&s.speciallock)
+ for sp := s.specials; sp != nil; sp = sp.next {
+ if sp.kind != _KindSpecialFinalizer {
+ continue
+ }
+ // don't mark finalized object, but scan it so we
+ // retain everything it points to.
+ spf := (*specialfinalizer)(unsafe.Pointer(sp))
+ // A finalizer can be set for an inner byte of an object, find object beginning.
+ p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
+
+ // Mark everything that can be reached from
+ // the object (but *not* the object itself or
+ // we'll never collect it).
+ scanobject(p, gcw)
+
+ // The special itself is a root.
+ scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
+ }
+ unlock(&s.speciallock)
+ }
+ }
+}
+
+// oldMarkrootSpans marks roots for one shard of work.spans.
+//
+// For go115NewMarkrootSpans = false.
+//
+//go:nowritebarrier
+func oldMarkrootSpans(gcw *gcWork, shard int) {
// Objects with finalizers have two GC-related invariants:
//
// 1) Everything reachable from the object must be marked.
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index c075f66b12..c63db24b33 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -246,6 +246,7 @@ func (s *mspan) sweep(preserve bool) bool {
// 2. A tiny object can have several finalizers setup for different offsets.
// If such object is not marked, we need to queue all finalizers at once.
// Both 1 and 2 are possible at the same time.
+ hadSpecials := s.specials != nil
specialp := &s.specials
special := *specialp
for special != nil {
@@ -290,6 +291,9 @@ func (s *mspan) sweep(preserve bool) bool {
special = *specialp
}
}
+ if go115NewMarkrootSpans && hadSpecials && s.specials == nil {
+ spanHasNoSpecials(s)
+ }
if debug.allocfreetrace != 0 || debug.clobberfree != 0 || raceenabled || msanenabled {
// Find all newly freed objects. This doesn't have to
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 9bb33b2000..9448748603 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -27,6 +27,23 @@ const (
// maxPhysHugePageSize sets an upper-bound on the maximum huge page size
// that the runtime supports.
maxPhysHugePageSize = pallocChunkBytes
+
+ // pagesPerReclaimerChunk indicates how many pages to scan from the
+ // pageInUse bitmap at a time. Used by the page reclaimer.
+ //
+ // Higher values reduce contention on scanning indexes (such as
+ // h.reclaimIndex), but increase the minimum latency of the
+ // operation.
+ //
+ // The time required to scan this many pages can vary a lot depending
+ // on how many spans are actually freed. Experimentally, it can
+ // scan for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
+ // free spans at ~32 MB/ms. Using 512 pages bounds this at
+ // roughly 100µs.
+ //
+ // Must be a multiple of the pageInUse bitmap element size and
+ // must also evenly divid pagesPerArena.
+ pagesPerReclaimerChunk = 512
)
// Main malloc heap.
@@ -180,13 +197,19 @@ type mheap struct {
// simply blocking GC (by disabling preemption).
sweepArenas []arenaIdx
+ // markArenas is a snapshot of allArenas taken at the beginning
+ // of the mark cycle. Because allArenas is append-only, neither
+ // this slice nor its contents will change during the mark, so
+ // it can be read safely.
+ markArenas []arenaIdx
+
// curArena is the arena that the heap is currently growing
// into. This should always be physPageSize-aligned.
curArena struct {
base, end uintptr
}
- _ uint32 // ensure 64-bit alignment of central
+ // _ uint32 // ensure 64-bit alignment of central
// central free lists for small size classes.
// the padding makes sure that the mcentrals are
@@ -256,6 +279,16 @@ type heapArena struct {
// operations.
pageMarks [pagesPerArena / 8]uint8
+ // pageSpecials is a bitmap that indicates which spans have
+ // specials (finalizers or other). Like pageInUse, only the bit
+ // corresponding to the first page in each span is used.
+ //
+ // Writes are done atomically whenever a special is added to
+ // a span and whenever the last special is removed from a span.
+ // Reads are done atomically to find spans containing specials
+ // during marking.
+ pageSpecials [pagesPerArena / 8]uint8
+
// zeroedBase marks the first byte of the first page in this
// arena which hasn't been used yet and is therefore already
// zero. zeroedBase is relative to the arena base.
@@ -706,23 +739,10 @@ func (h *mheap) init() {
//
// h must NOT be locked.
func (h *mheap) reclaim(npage uintptr) {
- // This scans pagesPerChunk at a time. Higher values reduce
- // contention on h.reclaimPos, but increase the minimum
- // latency of performing a reclaim.
- //
- // Must be a multiple of the pageInUse bitmap element size.
- //
- // The time required by this can vary a lot depending on how
- // many spans are actually freed. Experimentally, it can scan
- // for pages at ~300 GB/ms on a 2.6GHz Core i7, but can only
- // free spans at ~32 MB/ms. Using 512 pages bounds this at
- // roughly 100µs.
- //
// TODO(austin): Half of the time spent freeing spans is in
// locking/unlocking the heap (even with low contention). We
// could make the slow path here several times faster by
// batching heap frees.
- const pagesPerChunk = 512
// Bail early if there's no more reclaim work.
if atomic.Load64(&h.reclaimIndex) >= 1<<63 {
@@ -755,7 +775,7 @@ func (h *mheap) reclaim(npage uintptr) {
}
// Claim a chunk of work.
- idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerChunk) - pagesPerChunk)
+ idx := uintptr(atomic.Xadd64(&h.reclaimIndex, pagesPerReclaimerChunk) - pagesPerReclaimerChunk)
if idx/pagesPerArena >= uintptr(len(arenas)) {
// Page reclaiming is done.
atomic.Store64(&h.reclaimIndex, 1<<63)
@@ -769,7 +789,7 @@ func (h *mheap) reclaim(npage uintptr) {
}
// Scan this chunk.
- nfound := h.reclaimChunk(arenas, idx, pagesPerChunk)
+ nfound := h.reclaimChunk(arenas, idx, pagesPerReclaimerChunk)
if nfound <= npage {
npage -= nfound
} else {
@@ -1593,6 +1613,22 @@ type special struct {
kind byte // kind of special
}
+// spanHasSpecials marks a span as having specials in the arena bitmap.
+func spanHasSpecials(s *mspan) {
+ arenaPage := (s.base() / pageSize) % pagesPerArena
+ ai := arenaIndex(s.base())
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ atomic.Or8(&ha.pageSpecials[arenaPage/8], uint8(1)<<(arenaPage%8))
+}
+
+// spanHasNoSpecials marks a span as having no specials in the arena bitmap.
+func spanHasNoSpecials(s *mspan) {
+ arenaPage := (s.base() / pageSize) % pagesPerArena
+ ai := arenaIndex(s.base())
+ ha := mheap_.arenas[ai.l1()][ai.l2()]
+ atomic.And8(&ha.pageSpecials[arenaPage/8], ^(uint8(1) << (arenaPage % 8)))
+}
+
// Adds the special record s to the list of special records for
// the object p. All fields of s should be filled in except for
// offset & next, which this routine will fill in.
@@ -1638,6 +1674,9 @@ func addspecial(p unsafe.Pointer, s *special) bool {
s.offset = uint16(offset)
s.next = *t
*t = s
+ if go115NewMarkrootSpans {
+ spanHasSpecials(span)
+ }
unlock(&span.speciallock)
releasem(mp)
@@ -1661,6 +1700,7 @@ func removespecial(p unsafe.Pointer, kind uint8) *special {
offset := uintptr(p) - span.base()
+ var result *special
lock(&span.speciallock)
t := &span.specials
for {
@@ -1672,15 +1712,17 @@ func removespecial(p unsafe.Pointer, kind uint8) *special {
// "interior" specials (p must be exactly equal to s->offset).
if offset == uintptr(s.offset) && kind == s.kind {
*t = s.next
- unlock(&span.speciallock)
- releasem(mp)
- return s
+ result = s
+ break
}
t = &s.next
}
+ if go115NewMarkrootSpans && span.specials == nil {
+ spanHasNoSpecials(span)
+ }
unlock(&span.speciallock)
releasem(mp)
- return nil
+ return result
}
// The described object has a finalizer set for it.