diff options
Diffstat (limited to 'src/runtime/malloc.go')
| -rw-r--r-- | src/runtime/malloc.go | 329 |
1 files changed, 325 insertions, 4 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index fc4f21b532..13f5fc3081 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1080,7 +1080,8 @@ func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, checkGCTrigger // // We might consider turning these on by default; many of them previously were. // They account for a few % of mallocgc's cost though, which does matter somewhat -// at scale. +// at scale. (When testing changes to malloc, consider enabling this, and also +// some function-local 'doubleCheck' consts such as in mbitmap.go currently.) const doubleCheckMalloc = false // sizeSpecializedMallocEnabled is the set of conditions where we enable the size-specialized @@ -1089,6 +1090,12 @@ const doubleCheckMalloc = false // properly on plan9, so size-specialized malloc is also disabled on plan9. const sizeSpecializedMallocEnabled = goexperiment.SizeSpecializedMalloc && GOOS != "plan9" && !asanenabled && !raceenabled && !msanenabled && !valgrindenabled +// runtimeFreegcEnabled is the set of conditions where we enable the runtime.freegc +// implementation and the corresponding allocation-related changes: the experiment must be +// enabled, and none of the memory sanitizers should be enabled. We allow the race detector, +// in contrast to sizeSpecializedMallocEnabled. +const runtimeFreegcEnabled = goexperiment.RuntimeFreegc && !asanenabled && !msanenabled && !valgrindenabled + // Allocate an object of size bytes. // Small objects are allocated from the per-P cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. @@ -1150,7 +1157,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { size += asanRZ } - // Assist the GC if needed. + // Assist the GC if needed. (On the reuse path, we currently compensate for this; + // changes here might require changes there.) if gcBlackenEnabled != 0 { deductAssistCredit(size) } @@ -1413,6 +1421,16 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe size = uintptr(gc.SizeClassToSize[sizeclass]) spc := makeSpanClass(sizeclass, true) span := c.alloc[spc] + + // First, check for a reusable object. + if runtimeFreegcEnabled && c.hasReusableNoscan(spc) { + // We have a reusable object, use it. + x := mallocgcSmallNoscanReuse(c, span, spc, size, needzero) + mp.mallocing = 0 + releasem(mp) + return x, size + } + v := nextFreeFast(span) if v == 0 { v, span, checkGCTrigger = c.nextFree(spc) @@ -1472,6 +1490,55 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe return x, size } +// mallocgcSmallNoscanReuse returns a previously freed noscan object after preparing it for reuse. +// It must only be called if hasReusableNoscan returned true. +func mallocgcSmallNoscanReuse(c *mcache, span *mspan, spc spanClass, size uintptr, needzero bool) unsafe.Pointer { + // TODO(thepudds): could nextFreeFast, nextFree and nextReusable return unsafe.Pointer? + // Maybe doesn't matter. gclinkptr might be for historical reasons. + v, span := c.nextReusableNoScan(span, spc) + x := unsafe.Pointer(v) + + // Compensate for the GC assist credit deducted in mallocgc (before calling us and + // after we return) because this is not a newly allocated object. We use the full slot + // size (elemsize) here because that's what mallocgc deducts overall. Note we only + // adjust this when gcBlackenEnabled is true, which follows mallocgc behavior. + // TODO(thepudds): a follow-up CL adds a more specific test of our assist credit + // handling, including for validating internal fragmentation handling. + if gcBlackenEnabled != 0 { + addAssistCredit(size) + } + + // This is a previously used object, so only check needzero (and not span.needzero) + // for clearing. + if needzero { + memclrNoHeapPointers(x, size) + } + + // See publicationBarrier comment in mallocgcSmallNoscan. + publicationBarrier() + + // Finish and return. Note that we do not update span.freeIndexForScan, profiling info, + // nor do we check gcTrigger. + // TODO(thepudds): the current approach is viable for a GOEXPERIMENT, but + // means we do not profile reused heap objects. Ultimately, we will need a better + // approach for profiling, or at least ensure we are not introducing bias in the + // profiled allocations. + // TODO(thepudds): related, we probably want to adjust how allocs and frees are counted + // in the existing stats. Currently, reused objects are not counted as allocs nor + // frees, but instead roughly appear as if the original heap object lived on. We + // probably will also want some additional runtime/metrics, and generally think about + // user-facing observability & diagnostics, though all this likely can wait for an + // official proposal. + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + gcmarknewobject(span, uintptr(x)) + } + return x +} + func mallocgcSmallScanNoHeader(size uintptr, typ *_type) (unsafe.Pointer, uintptr) { // Set mp.mallocing to keep from being preempted by GC. mp := acquirem() @@ -1816,8 +1883,6 @@ func postMallocgcDebug(x unsafe.Pointer, elemsize uintptr, typ *_type) { // by size bytes, and assists the GC if necessary. // // Caller must be preemptible. -// -// Returns the G for which the assist credit was accounted. func deductAssistCredit(size uintptr) { // Charge the current user G for this allocation. assistG := getg() @@ -1836,6 +1901,262 @@ func deductAssistCredit(size uintptr) { } } +// addAssistCredit is like deductAssistCredit, +// but adds credit rather than removes, +// and never calls gcAssistAlloc. +func addAssistCredit(size uintptr) { + // Credit the current user G. + assistG := getg() + if assistG.m.curg != nil { // TODO(thepudds): do we need to do this? + assistG = assistG.m.curg + } + // Credit the size against the G. + assistG.gcAssistBytes += int64(size) +} + +const ( + // doubleCheckReusable enables some additional invariant checks for the + // runtime.freegc and reusable objects. Note that some of these checks alter timing, + // and it is good to test changes with and without this enabled. + doubleCheckReusable = false + + // debugReusableLog enables some printlns for runtime.freegc and reusable objects. + debugReusableLog = false +) + +// freegc records that a heap object is reusable and available for +// immediate reuse in a subsequent mallocgc allocation, without +// needing to wait for the GC cycle to progress. +// +// The information is recorded in a free list stored in the +// current P's mcache. The caller must pass in the user size +// and whether the object has pointers, which allows a faster free +// operation. +// +// freegc must be called by the effective owner of ptr who knows +// the pointer is logically dead, with no possible aliases that might +// be used past that moment. In other words, ptr must be the +// last and only pointer to its referent. +// +// The intended caller is the compiler. +// +// Note: please do not send changes that attempt to add freegc calls +// to the standard library. +// +// ptr must point to a heap object or into the current g's stack, +// in which case freegc is a no-op. In particular, ptr must not point +// to memory in the data or bss sections, which is partially enforced. +// For objects with a malloc header, ptr should point mallocHeaderSize bytes +// past the base; otherwise, ptr should point to the base of the heap object. +// In other words, ptr should be the same pointer that was returned by mallocgc. +// +// In addition, the caller must know that ptr's object has no specials, such +// as might have been created by a call to SetFinalizer or AddCleanup. +// (Internally, the runtime deals appropriately with internally-created +// specials, such as specials for memory profiling). +// +// If the size of ptr's object is less than 16 bytes or greater than +// 32KiB - gc.MallocHeaderSize bytes, freegc is currently a no-op. It must only +// be called in alloc-safe places. It currently throws if noscan is false +// (support for which is implemented in a later CL in our stack). +// +// Note that freegc accepts an unsafe.Pointer and hence keeps the pointer +// alive. It therefore could be a pessimization in some cases (such +// as a long-lived function) if the caller does not call freegc before +// or roughly when the liveness analysis of the compiler +// would otherwise have determined ptr's object is reclaimable by the GC. +func freegc(ptr unsafe.Pointer, size uintptr, noscan bool) bool { + if !runtimeFreegcEnabled || sizeSpecializedMallocEnabled || !reusableSize(size) { + // TODO(thepudds): temporarily disable freegc with SizeSpecializedMalloc until we finish integrating. + return false + } + if ptr == nil { + throw("freegc nil") + } + + // Set mp.mallocing to keep from being preempted by GC. + // Otherwise, the GC could flush our mcache or otherwise cause problems. + mp := acquirem() + if mp.mallocing != 0 { + throw("freegc deadlock") + } + if mp.gsignal == getg() { + throw("freegc during signal") + } + mp.mallocing = 1 + + if mp.curg.stack.lo <= uintptr(ptr) && uintptr(ptr) < mp.curg.stack.hi { + // This points into our stack, so free is a no-op. + mp.mallocing = 0 + releasem(mp) + return false + } + + if doubleCheckReusable { + // TODO(thepudds): we could enforce no free on globals in bss or data. Maybe by + // checking span via spanOf or spanOfHeap, or maybe walk from firstmoduledata + // like isGoPointerWithoutSpan, or activeModules, or something. If so, we might + // be able to delay checking until reuse (e.g., check span just before reusing, + // though currently we don't always need to lookup a span on reuse). If we think + // no usage patterns could result in globals, maybe enforcement for globals could + // be behind -d=checkptr=1 or similar. The compiler can have knowledge of where + // a variable is allocated, but stdlib does not, although there are certain + // usage patterns that cannot result in a global. + // TODO(thepudds): separately, consider a local debugReusableMcacheOnly here + // to ignore freed objects if not in mspan in mcache, maybe when freeing and reading, + // by checking something like s.base() <= uintptr(v) && uintptr(v) < s.limit. Or + // maybe a GODEBUG or compiler debug flag. + span := spanOf(uintptr(ptr)) + if span == nil { + throw("nextReusable: nil span for pointer in free list") + } + if state := span.state.get(); state != mSpanInUse { + throw("nextReusable: span is not in use") + } + } + + if debug.clobberfree != 0 { + clobberfree(ptr, size) + } + + // We first check if p is still in our per-P cache. + // Get our per-P cache for small objects. + c := getMCache(mp) + if c == nil { + throw("freegc called without a P or outside bootstrapping") + } + + v := uintptr(ptr) + if !noscan && !heapBitsInSpan(size) { + // mallocgcSmallScanHeader expects to get the base address of the object back + // from the findReusable funcs (as well as from nextFreeFast and nextFree), and + // not mallocHeaderSize bytes into a object, so adjust that here. + v -= mallocHeaderSize + + // The size class lookup wants size to be adjusted by mallocHeaderSize. + size += mallocHeaderSize + } + + // TODO(thepudds): should verify (behind doubleCheckReusable constant) that our calculated + // sizeclass here matches what's in span found via spanOf(ptr) or findObject(ptr). + var sizeclass uint8 + if size <= gc.SmallSizeMax-8 { + sizeclass = gc.SizeToSizeClass8[divRoundUp(size, gc.SmallSizeDiv)] + } else { + sizeclass = gc.SizeToSizeClass128[divRoundUp(size-gc.SmallSizeMax, gc.LargeSizeDiv)] + } + + spc := makeSpanClass(sizeclass, noscan) + s := c.alloc[spc] + + if debugReusableLog { + if s.base() <= uintptr(v) && uintptr(v) < s.limit { + println("freegc [in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled) + } else { + println("freegc [NOT in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled) + } + } + + if noscan { + c.addReusableNoscan(spc, uintptr(v)) + } else { + // TODO(thepudds): implemented in later CL in our stack. + throw("freegc called for object with pointers, not yet implemented") + } + + // For stats, for now we leave allocCount alone, roughly pretending to the rest + // of the system that this potential reuse never happened. + + mp.mallocing = 0 + releasem(mp) + + return true +} + +// nextReusableNoScan returns the next reusable object for a noscan span, +// or 0 if no reusable object is found. +func (c *mcache) nextReusableNoScan(s *mspan, spc spanClass) (gclinkptr, *mspan) { + if !runtimeFreegcEnabled { + return 0, s + } + + // Pop a reusable pointer from the free list for this span class. + v := c.reusableNoscan[spc] + if v == 0 { + return 0, s + } + c.reusableNoscan[spc] = v.ptr().next + + if debugReusableLog { + println("reusing from ptr free list:", hex(v), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled) + } + if doubleCheckReusable { + doubleCheckNextReusable(v) // debug only sanity check + } + + // For noscan spans, we only need the span if the write barrier is enabled (so that our caller + // can call gcmarknewobject to allocate black). If the write barrier is enabled, we can skip + // looking up the span when the pointer is in a span in the mcache. + if !writeBarrier.enabled { + return v, nil + } + if s.base() <= uintptr(v) && uintptr(v) < s.limit { + // Return the original span. + return v, s + } + + // We must find and return the span. + span := spanOf(uintptr(v)) + if span == nil { + // TODO(thepudds): construct a test that triggers this throw. + throw("nextReusableNoScan: nil span for pointer in reusable object free list") + } + + return v, span +} + +// doubleCheckNextReusable checks some invariants. +// TODO(thepudds): will probably delete some of this. Can mostly be ignored for review. +func doubleCheckNextReusable(v gclinkptr) { + // TODO(thepudds): should probably take the spanClass as well to confirm expected + // sizeclass match. + _, span, objIndex := findObject(uintptr(v), 0, 0) + if span == nil { + throw("nextReusable: nil span for pointer in free list") + } + if state := span.state.get(); state != mSpanInUse { + throw("nextReusable: span is not in use") + } + if uintptr(v) < span.base() || uintptr(v) >= span.limit { + throw("nextReusable: span is not in range") + } + if span.objBase(uintptr(v)) != uintptr(v) { + print("nextReusable: v=", hex(v), " base=", hex(span.objBase(uintptr(v))), "\n") + throw("nextReusable: v is non-base-address for object found on pointer free list") + } + if span.isFree(objIndex) { + throw("nextReusable: pointer on free list is free") + } + + const debugReusableEnsureSwept = false + if debugReusableEnsureSwept { + // Currently disabled. + // Note: ensureSwept here alters behavior (not just an invariant check). + span.ensureSwept() + if span.isFree(objIndex) { + throw("nextReusable: pointer on free list is free after ensureSwept") + } + } +} + +// reusableSize reports if size is a currently supported size for a reusable object. +func reusableSize(size uintptr) bool { + if size < maxTinySize || size > maxSmallSize-mallocHeaderSize { + return false + } + return true +} + // memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers // on chunks of the buffer to be zeroed, with opportunities for preemption // along the way. memclrNoHeapPointers contains no safepoints and also |
