diff options
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/export_test.go | 9 | ||||
| -rw-r--r-- | src/runtime/malloc.go | 329 | ||||
| -rw-r--r-- | src/runtime/malloc_test.go | 286 | ||||
| -rw-r--r-- | src/runtime/mcache.go | 52 | ||||
| -rw-r--r-- | src/runtime/mheap.go | 2 |
5 files changed, 672 insertions, 6 deletions
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 731ba5d6b9..48dcf5aa39 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -639,6 +639,15 @@ func RunGetgThreadSwitchTest() { } } +// Expose freegc for testing. +func Freegc(p unsafe.Pointer, size uintptr, noscan bool) { + freegc(p, size, noscan) +} + +const SizeSpecializedMallocEnabled = sizeSpecializedMallocEnabled + +const RuntimeFreegcEnabled = runtimeFreegcEnabled + const ( PageSize = pageSize PallocChunkPages = pallocChunkPages diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index fc4f21b532..13f5fc3081 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1080,7 +1080,8 @@ func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, checkGCTrigger // // We might consider turning these on by default; many of them previously were. // They account for a few % of mallocgc's cost though, which does matter somewhat -// at scale. +// at scale. (When testing changes to malloc, consider enabling this, and also +// some function-local 'doubleCheck' consts such as in mbitmap.go currently.) const doubleCheckMalloc = false // sizeSpecializedMallocEnabled is the set of conditions where we enable the size-specialized @@ -1089,6 +1090,12 @@ const doubleCheckMalloc = false // properly on plan9, so size-specialized malloc is also disabled on plan9. const sizeSpecializedMallocEnabled = goexperiment.SizeSpecializedMalloc && GOOS != "plan9" && !asanenabled && !raceenabled && !msanenabled && !valgrindenabled +// runtimeFreegcEnabled is the set of conditions where we enable the runtime.freegc +// implementation and the corresponding allocation-related changes: the experiment must be +// enabled, and none of the memory sanitizers should be enabled. We allow the race detector, +// in contrast to sizeSpecializedMallocEnabled. +const runtimeFreegcEnabled = goexperiment.RuntimeFreegc && !asanenabled && !msanenabled && !valgrindenabled + // Allocate an object of size bytes. // Small objects are allocated from the per-P cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. @@ -1150,7 +1157,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { size += asanRZ } - // Assist the GC if needed. + // Assist the GC if needed. (On the reuse path, we currently compensate for this; + // changes here might require changes there.) if gcBlackenEnabled != 0 { deductAssistCredit(size) } @@ -1413,6 +1421,16 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe size = uintptr(gc.SizeClassToSize[sizeclass]) spc := makeSpanClass(sizeclass, true) span := c.alloc[spc] + + // First, check for a reusable object. + if runtimeFreegcEnabled && c.hasReusableNoscan(spc) { + // We have a reusable object, use it. + x := mallocgcSmallNoscanReuse(c, span, spc, size, needzero) + mp.mallocing = 0 + releasem(mp) + return x, size + } + v := nextFreeFast(span) if v == 0 { v, span, checkGCTrigger = c.nextFree(spc) @@ -1472,6 +1490,55 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe return x, size } +// mallocgcSmallNoscanReuse returns a previously freed noscan object after preparing it for reuse. +// It must only be called if hasReusableNoscan returned true. +func mallocgcSmallNoscanReuse(c *mcache, span *mspan, spc spanClass, size uintptr, needzero bool) unsafe.Pointer { + // TODO(thepudds): could nextFreeFast, nextFree and nextReusable return unsafe.Pointer? + // Maybe doesn't matter. gclinkptr might be for historical reasons. + v, span := c.nextReusableNoScan(span, spc) + x := unsafe.Pointer(v) + + // Compensate for the GC assist credit deducted in mallocgc (before calling us and + // after we return) because this is not a newly allocated object. We use the full slot + // size (elemsize) here because that's what mallocgc deducts overall. Note we only + // adjust this when gcBlackenEnabled is true, which follows mallocgc behavior. + // TODO(thepudds): a follow-up CL adds a more specific test of our assist credit + // handling, including for validating internal fragmentation handling. + if gcBlackenEnabled != 0 { + addAssistCredit(size) + } + + // This is a previously used object, so only check needzero (and not span.needzero) + // for clearing. + if needzero { + memclrNoHeapPointers(x, size) + } + + // See publicationBarrier comment in mallocgcSmallNoscan. + publicationBarrier() + + // Finish and return. Note that we do not update span.freeIndexForScan, profiling info, + // nor do we check gcTrigger. + // TODO(thepudds): the current approach is viable for a GOEXPERIMENT, but + // means we do not profile reused heap objects. Ultimately, we will need a better + // approach for profiling, or at least ensure we are not introducing bias in the + // profiled allocations. + // TODO(thepudds): related, we probably want to adjust how allocs and frees are counted + // in the existing stats. Currently, reused objects are not counted as allocs nor + // frees, but instead roughly appear as if the original heap object lived on. We + // probably will also want some additional runtime/metrics, and generally think about + // user-facing observability & diagnostics, though all this likely can wait for an + // official proposal. + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + gcmarknewobject(span, uintptr(x)) + } + return x +} + func mallocgcSmallScanNoHeader(size uintptr, typ *_type) (unsafe.Pointer, uintptr) { // Set mp.mallocing to keep from being preempted by GC. mp := acquirem() @@ -1816,8 +1883,6 @@ func postMallocgcDebug(x unsafe.Pointer, elemsize uintptr, typ *_type) { // by size bytes, and assists the GC if necessary. // // Caller must be preemptible. -// -// Returns the G for which the assist credit was accounted. func deductAssistCredit(size uintptr) { // Charge the current user G for this allocation. assistG := getg() @@ -1836,6 +1901,262 @@ func deductAssistCredit(size uintptr) { } } +// addAssistCredit is like deductAssistCredit, +// but adds credit rather than removes, +// and never calls gcAssistAlloc. +func addAssistCredit(size uintptr) { + // Credit the current user G. + assistG := getg() + if assistG.m.curg != nil { // TODO(thepudds): do we need to do this? + assistG = assistG.m.curg + } + // Credit the size against the G. + assistG.gcAssistBytes += int64(size) +} + +const ( + // doubleCheckReusable enables some additional invariant checks for the + // runtime.freegc and reusable objects. Note that some of these checks alter timing, + // and it is good to test changes with and without this enabled. + doubleCheckReusable = false + + // debugReusableLog enables some printlns for runtime.freegc and reusable objects. + debugReusableLog = false +) + +// freegc records that a heap object is reusable and available for +// immediate reuse in a subsequent mallocgc allocation, without +// needing to wait for the GC cycle to progress. +// +// The information is recorded in a free list stored in the +// current P's mcache. The caller must pass in the user size +// and whether the object has pointers, which allows a faster free +// operation. +// +// freegc must be called by the effective owner of ptr who knows +// the pointer is logically dead, with no possible aliases that might +// be used past that moment. In other words, ptr must be the +// last and only pointer to its referent. +// +// The intended caller is the compiler. +// +// Note: please do not send changes that attempt to add freegc calls +// to the standard library. +// +// ptr must point to a heap object or into the current g's stack, +// in which case freegc is a no-op. In particular, ptr must not point +// to memory in the data or bss sections, which is partially enforced. +// For objects with a malloc header, ptr should point mallocHeaderSize bytes +// past the base; otherwise, ptr should point to the base of the heap object. +// In other words, ptr should be the same pointer that was returned by mallocgc. +// +// In addition, the caller must know that ptr's object has no specials, such +// as might have been created by a call to SetFinalizer or AddCleanup. +// (Internally, the runtime deals appropriately with internally-created +// specials, such as specials for memory profiling). +// +// If the size of ptr's object is less than 16 bytes or greater than +// 32KiB - gc.MallocHeaderSize bytes, freegc is currently a no-op. It must only +// be called in alloc-safe places. It currently throws if noscan is false +// (support for which is implemented in a later CL in our stack). +// +// Note that freegc accepts an unsafe.Pointer and hence keeps the pointer +// alive. It therefore could be a pessimization in some cases (such +// as a long-lived function) if the caller does not call freegc before +// or roughly when the liveness analysis of the compiler +// would otherwise have determined ptr's object is reclaimable by the GC. +func freegc(ptr unsafe.Pointer, size uintptr, noscan bool) bool { + if !runtimeFreegcEnabled || sizeSpecializedMallocEnabled || !reusableSize(size) { + // TODO(thepudds): temporarily disable freegc with SizeSpecializedMalloc until we finish integrating. + return false + } + if ptr == nil { + throw("freegc nil") + } + + // Set mp.mallocing to keep from being preempted by GC. + // Otherwise, the GC could flush our mcache or otherwise cause problems. + mp := acquirem() + if mp.mallocing != 0 { + throw("freegc deadlock") + } + if mp.gsignal == getg() { + throw("freegc during signal") + } + mp.mallocing = 1 + + if mp.curg.stack.lo <= uintptr(ptr) && uintptr(ptr) < mp.curg.stack.hi { + // This points into our stack, so free is a no-op. + mp.mallocing = 0 + releasem(mp) + return false + } + + if doubleCheckReusable { + // TODO(thepudds): we could enforce no free on globals in bss or data. Maybe by + // checking span via spanOf or spanOfHeap, or maybe walk from firstmoduledata + // like isGoPointerWithoutSpan, or activeModules, or something. If so, we might + // be able to delay checking until reuse (e.g., check span just before reusing, + // though currently we don't always need to lookup a span on reuse). If we think + // no usage patterns could result in globals, maybe enforcement for globals could + // be behind -d=checkptr=1 or similar. The compiler can have knowledge of where + // a variable is allocated, but stdlib does not, although there are certain + // usage patterns that cannot result in a global. + // TODO(thepudds): separately, consider a local debugReusableMcacheOnly here + // to ignore freed objects if not in mspan in mcache, maybe when freeing and reading, + // by checking something like s.base() <= uintptr(v) && uintptr(v) < s.limit. Or + // maybe a GODEBUG or compiler debug flag. + span := spanOf(uintptr(ptr)) + if span == nil { + throw("nextReusable: nil span for pointer in free list") + } + if state := span.state.get(); state != mSpanInUse { + throw("nextReusable: span is not in use") + } + } + + if debug.clobberfree != 0 { + clobberfree(ptr, size) + } + + // We first check if p is still in our per-P cache. + // Get our per-P cache for small objects. + c := getMCache(mp) + if c == nil { + throw("freegc called without a P or outside bootstrapping") + } + + v := uintptr(ptr) + if !noscan && !heapBitsInSpan(size) { + // mallocgcSmallScanHeader expects to get the base address of the object back + // from the findReusable funcs (as well as from nextFreeFast and nextFree), and + // not mallocHeaderSize bytes into a object, so adjust that here. + v -= mallocHeaderSize + + // The size class lookup wants size to be adjusted by mallocHeaderSize. + size += mallocHeaderSize + } + + // TODO(thepudds): should verify (behind doubleCheckReusable constant) that our calculated + // sizeclass here matches what's in span found via spanOf(ptr) or findObject(ptr). + var sizeclass uint8 + if size <= gc.SmallSizeMax-8 { + sizeclass = gc.SizeToSizeClass8[divRoundUp(size, gc.SmallSizeDiv)] + } else { + sizeclass = gc.SizeToSizeClass128[divRoundUp(size-gc.SmallSizeMax, gc.LargeSizeDiv)] + } + + spc := makeSpanClass(sizeclass, noscan) + s := c.alloc[spc] + + if debugReusableLog { + if s.base() <= uintptr(v) && uintptr(v) < s.limit { + println("freegc [in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled) + } else { + println("freegc [NOT in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled) + } + } + + if noscan { + c.addReusableNoscan(spc, uintptr(v)) + } else { + // TODO(thepudds): implemented in later CL in our stack. + throw("freegc called for object with pointers, not yet implemented") + } + + // For stats, for now we leave allocCount alone, roughly pretending to the rest + // of the system that this potential reuse never happened. + + mp.mallocing = 0 + releasem(mp) + + return true +} + +// nextReusableNoScan returns the next reusable object for a noscan span, +// or 0 if no reusable object is found. +func (c *mcache) nextReusableNoScan(s *mspan, spc spanClass) (gclinkptr, *mspan) { + if !runtimeFreegcEnabled { + return 0, s + } + + // Pop a reusable pointer from the free list for this span class. + v := c.reusableNoscan[spc] + if v == 0 { + return 0, s + } + c.reusableNoscan[spc] = v.ptr().next + + if debugReusableLog { + println("reusing from ptr free list:", hex(v), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled) + } + if doubleCheckReusable { + doubleCheckNextReusable(v) // debug only sanity check + } + + // For noscan spans, we only need the span if the write barrier is enabled (so that our caller + // can call gcmarknewobject to allocate black). If the write barrier is enabled, we can skip + // looking up the span when the pointer is in a span in the mcache. + if !writeBarrier.enabled { + return v, nil + } + if s.base() <= uintptr(v) && uintptr(v) < s.limit { + // Return the original span. + return v, s + } + + // We must find and return the span. + span := spanOf(uintptr(v)) + if span == nil { + // TODO(thepudds): construct a test that triggers this throw. + throw("nextReusableNoScan: nil span for pointer in reusable object free list") + } + + return v, span +} + +// doubleCheckNextReusable checks some invariants. +// TODO(thepudds): will probably delete some of this. Can mostly be ignored for review. +func doubleCheckNextReusable(v gclinkptr) { + // TODO(thepudds): should probably take the spanClass as well to confirm expected + // sizeclass match. + _, span, objIndex := findObject(uintptr(v), 0, 0) + if span == nil { + throw("nextReusable: nil span for pointer in free list") + } + if state := span.state.get(); state != mSpanInUse { + throw("nextReusable: span is not in use") + } + if uintptr(v) < span.base() || uintptr(v) >= span.limit { + throw("nextReusable: span is not in range") + } + if span.objBase(uintptr(v)) != uintptr(v) { + print("nextReusable: v=", hex(v), " base=", hex(span.objBase(uintptr(v))), "\n") + throw("nextReusable: v is non-base-address for object found on pointer free list") + } + if span.isFree(objIndex) { + throw("nextReusable: pointer on free list is free") + } + + const debugReusableEnsureSwept = false + if debugReusableEnsureSwept { + // Currently disabled. + // Note: ensureSwept here alters behavior (not just an invariant check). + span.ensureSwept() + if span.isFree(objIndex) { + throw("nextReusable: pointer on free list is free after ensureSwept") + } + } +} + +// reusableSize reports if size is a currently supported size for a reusable object. +func reusableSize(size uintptr) bool { + if size < maxTinySize || size > maxSmallSize-mallocHeaderSize { + return false + } + return true +} + // memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers // on chunks of the buffer to be zeroed, with opportunities for preemption // along the way. memclrNoHeapPointers contains no safepoints and also diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go index bf58947bbc..6285cdaff7 100644 --- a/src/runtime/malloc_test.go +++ b/src/runtime/malloc_test.go @@ -16,6 +16,7 @@ import ( "runtime" . "runtime" "strings" + "sync" "sync/atomic" "testing" "time" @@ -234,6 +235,275 @@ func TestTinyAllocIssue37262(t *testing.T) { runtime.Releasem() } +// TestFreegc does basic testing of explicit frees. +func TestFreegc(t *testing.T) { + tests := []struct { + size string + f func(noscan bool) func(*testing.T) + noscan bool + }{ + // Types without pointers. + {"size=16", testFreegc[[16]byte], true}, // smallest we support currently + {"size=17", testFreegc[[17]byte], true}, + {"size=64", testFreegc[[64]byte], true}, + {"size=500", testFreegc[[500]byte], true}, + {"size=512", testFreegc[[512]byte], true}, + {"size=4096", testFreegc[[4096]byte], true}, + {"size=32KiB-8", testFreegc[[1<<15 - 8]byte], true}, // max noscan small object for 64-bit + } + + // Run the tests twice if not in -short mode or not otherwise saving test time. + // First while manually calling runtime.GC to slightly increase isolation (perhaps making + // problems more reproducible). + for _, tt := range tests { + runtime.GC() + t.Run(fmt.Sprintf("gc=yes/ptrs=%v/%s", !tt.noscan, tt.size), tt.f(tt.noscan)) + } + runtime.GC() + + if testing.Short() || !RuntimeFreegcEnabled || runtime.Raceenabled { + return + } + + // Again, but without manually calling runtime.GC in the loop (perhaps less isolation might + // trigger problems). + for _, tt := range tests { + t.Run(fmt.Sprintf("gc=no/ptrs=%v/%s", !tt.noscan, tt.size), tt.f(tt.noscan)) + } + runtime.GC() +} + +func testFreegc[T comparable](noscan bool) func(*testing.T) { + // We use stressMultiple to influence the duration of the tests. + // When testing freegc changes, stressMultiple can be increased locally + // to test longer or in some cases with more goroutines. + // It can also be helpful to test with GODEBUG=clobberfree=1 and + // with and without doubleCheckMalloc and doubleCheckReusable enabled. + stressMultiple := 10 + if testing.Short() || !RuntimeFreegcEnabled || runtime.Raceenabled { + stressMultiple = 1 + } + + return func(t *testing.T) { + alloc := func() *T { + // Force heap alloc, plus some light validation of zeroed memory. + t.Helper() + p := Escape(new(T)) + var zero T + if *p != zero { + t.Fatalf("allocator returned non-zero memory: %v", *p) + } + return p + } + + free := func(p *T) { + t.Helper() + var zero T + if *p != zero { + t.Fatalf("found non-zero memory before freeing (tests do not modify memory): %v", *p) + } + runtime.Freegc(unsafe.Pointer(p), unsafe.Sizeof(*p), noscan) + } + + t.Run("basic-free", func(t *testing.T) { + // Test that freeing a live heap object doesn't crash. + for range 100 { + p := alloc() + free(p) + } + }) + + t.Run("stack-free", func(t *testing.T) { + // Test that freeing a stack object doesn't crash. + for range 100 { + var x [32]byte + var y [32]*int + runtime.Freegc(unsafe.Pointer(&x), unsafe.Sizeof(x), true) // noscan + runtime.Freegc(unsafe.Pointer(&y), unsafe.Sizeof(y), false) // !noscan + } + }) + + // Check our allocations. These tests rely on the + // current implementation treating a re-used object + // as not adding to the allocation counts seen + // by testing.AllocsPerRun. (This is not the desired + // long-term behavior, but it is the current behavior and + // makes these tests convenient). + + t.Run("allocs-baseline", func(t *testing.T) { + // Baseline result without any explicit free. + allocs := testing.AllocsPerRun(100, func() { + for range 100 { + p := alloc() + _ = p + } + }) + if allocs < 100 { + // TODO(thepudds): we get exactly 100 for almost all the tests, but investigate why + // ~101 allocs for TestFreegc/ptrs=true/size=32KiB-8. + t.Fatalf("expected >=100 allocations, got %v", allocs) + } + }) + + t.Run("allocs-with-free", func(t *testing.T) { + // Same allocations, but now using explicit free so that + // no allocs get reported. (Again, not the desired long-term behavior). + if SizeSpecializedMallocEnabled { + t.Skip("temporarily skipping alloc tests for GOEXPERIMENT=sizespecializedmalloc") + } + if !RuntimeFreegcEnabled { + t.Skip("skipping alloc tests with runtime.freegc disabled") + } + allocs := testing.AllocsPerRun(100, func() { + for range 100 { + p := alloc() + free(p) + } + }) + if allocs != 0 { + t.Fatalf("expected 0 allocations, got %v", allocs) + } + }) + + t.Run("free-multiple", func(t *testing.T) { + // Multiple allocations outstanding before explicitly freeing, + // but still within the limit of our smallest free list size + // so that no allocs are reported. (Again, not long-term behavior). + if SizeSpecializedMallocEnabled { + t.Skip("temporarily skipping alloc tests for GOEXPERIMENT=sizespecializedmalloc") + } + if !RuntimeFreegcEnabled { + t.Skip("skipping alloc tests with runtime.freegc disabled") + } + const maxOutstanding = 20 + s := make([]*T, 0, maxOutstanding) + allocs := testing.AllocsPerRun(100*stressMultiple, func() { + s = s[:0] + for range maxOutstanding { + p := alloc() + s = append(s, p) + } + for _, p := range s { + free(p) + } + }) + if allocs != 0 { + t.Fatalf("expected 0 allocations, got %v", allocs) + } + }) + + if runtime.GOARCH == "wasm" { + // TODO(thepudds): for wasm, double-check if just slow, vs. some test logic problem, + // vs. something else. It might have been wasm was slowest with tests that spawn + // many goroutines, which might be expected for wasm. This skip might no longer be + // needed now that we have tuned test execution time more, or perhaps wasm should just + // always run in short mode, which might also let us remove this skip. + t.Skip("skipping remaining freegc tests, was timing out on wasm") + } + + t.Run("free-many", func(t *testing.T) { + // Confirm we are graceful if we have more freed elements at once + // than the max free list size. + s := make([]*T, 0, 1000) + iterations := stressMultiple * stressMultiple // currently 1 or 100 depending on -short + for range iterations { + s = s[:0] + for range 1000 { + p := alloc() + s = append(s, p) + } + for _, p := range s { + free(p) + } + } + }) + + t.Run("duplicate-check", func(t *testing.T) { + // A simple duplicate allocation test. We track what should be the set + // of live pointers in a map across a series of allocs and frees, + // and fail if a live pointer value is returned by an allocation. + // TODO: maybe add randomness? allow more live pointers? do across goroutines? + live := make(map[uintptr]bool) + for i := range 100 * stressMultiple { + var s []*T + // Alloc 10 times, tracking the live pointer values. + for j := range 10 { + p := alloc() + uptr := uintptr(unsafe.Pointer(p)) + if live[uptr] { + t.Fatalf("TestFreeLive: found duplicate pointer (0x%x). i: %d j: %d", uptr, i, j) + } + live[uptr] = true + s = append(s, p) + } + // Explicitly free those pointers, removing them from the live map. + for k := range s { + p := s[k] + s[k] = nil + uptr := uintptr(unsafe.Pointer(p)) + free(p) + delete(live, uptr) + } + } + }) + + t.Run("free-other-goroutine", func(t *testing.T) { + // Use explicit free, but the free happens on a different goroutine than the alloc. + // This also lightly simulates how the free code sees P migration or flushing + // the mcache, assuming we have > 1 P. (Not using testing.AllocsPerRun here). + iterations := 10 * stressMultiple * stressMultiple // currently 10 or 1000 depending on -short + for _, capacity := range []int{2} { + for range iterations { + ch := make(chan *T, capacity) + var wg sync.WaitGroup + for range 2 { + wg.Add(1) + go func() { + defer wg.Done() + for p := range ch { + free(p) + } + }() + } + for range 100 { + p := alloc() + ch <- p + } + close(ch) + wg.Wait() + } + } + }) + + t.Run("many-goroutines", func(t *testing.T) { + // Allocate across multiple goroutines, freeing on the same goroutine. + // TODO: probably remove the duplicate checking here; not that useful. + counts := []int{1, 2, 4, 8, 10 * stressMultiple} + for _, goroutines := range counts { + var wg sync.WaitGroup + for range goroutines { + wg.Add(1) + go func() { + defer wg.Done() + live := make(map[uintptr]bool) + for range 100 * stressMultiple { + p := alloc() + uptr := uintptr(unsafe.Pointer(p)) + if live[uptr] { + panic("TestFreeLive: found duplicate pointer") + } + live[uptr] = true + free(p) + delete(live, uptr) + } + }() + } + wg.Wait() + } + }) + } +} + func TestPageCacheLeak(t *testing.T) { defer GOMAXPROCS(GOMAXPROCS(1)) leaked := PageCachePagesLeaked() @@ -337,6 +607,13 @@ func BenchmarkMalloc16(b *testing.B) { } } +func BenchmarkMalloc32(b *testing.B) { + for i := 0; i < b.N; i++ { + p := new([4]int64) + Escape(p) + } +} + func BenchmarkMallocTypeInfo8(b *testing.B) { for i := 0; i < b.N; i++ { p := new(struct { @@ -355,6 +632,15 @@ func BenchmarkMallocTypeInfo16(b *testing.B) { } } +func BenchmarkMallocTypeInfo32(b *testing.B) { + for i := 0; i < b.N; i++ { + p := new(struct { + p [32 / unsafe.Sizeof(uintptr(0))]*int + }) + Escape(p) + } +} + type LargeStruct struct { x [16][]byte } diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index cade81031d..82872f1454 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -44,7 +44,17 @@ type mcache struct { // The rest is not accessed on every malloc. - alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass + // alloc contains spans to allocate from, indexed by spanClass. + alloc [numSpanClasses]*mspan + + // TODO(thepudds): better to interleave alloc and reusableScan/reusableNoscan so that + // a single malloc call can often access both in the same cache line for a given spanClass. + // It's not interleaved right now in part to have slightly smaller diff, and might be + // negligible effect on current microbenchmarks. + + // reusableNoscan contains linked lists of reusable noscan heap objects, indexed by spanClass. + // The next pointers are stored in the first word of the heap objects. + reusableNoscan [numSpanClasses]gclinkptr stackcache [_NumStackOrders]stackfreelist @@ -96,6 +106,7 @@ func allocmcache() *mcache { c.alloc[i] = &emptymspan } c.nextSample = nextSample() + return c } @@ -153,6 +164,16 @@ func (c *mcache) refill(spc spanClass) { if s.allocCount != s.nelems { throw("refill of span with free space remaining") } + + // TODO(thepudds): we might be able to allow mallocgcTiny to reuse 16 byte objects from spc==5, + // but for now, just clear our reusable objects for tinySpanClass. + if spc == tinySpanClass { + c.reusableNoscan[spc] = 0 + } + if c.reusableNoscan[spc] != 0 { + throw("refill of span with reusable pointers remaining on pointer free list") + } + if s != &emptymspan { // Mark this span as no longer cached. if s.sweepgen != mheap_.sweepgen+3 { @@ -312,6 +333,13 @@ func (c *mcache) releaseAll() { c.tinyAllocs = 0 memstats.heapStats.release() + // Clear the reusable linked lists. + // For noscan objects, the nodes of the linked lists are the reusable heap objects themselves, + // so we can simply clear the linked list head pointers. + // TODO(thepudds): consider having debug logging of a non-empty reusable lists getting cleared, + // maybe based on the existing debugReusableLog. + clear(c.reusableNoscan[:]) + // Update heapLive and heapScan. gcController.update(dHeapLive, scanAlloc) } @@ -339,3 +367,25 @@ func (c *mcache) prepareForSweep() { stackcache_clear(c) c.flushGen.Store(mheap_.sweepgen) // Synchronizes with gcStart } + +// addReusableNoscan adds a noscan object pointer to the reusable pointer free list +// for a span class. +func (c *mcache) addReusableNoscan(spc spanClass, ptr uintptr) { + if !runtimeFreegcEnabled { + return + } + + // Add to the reusable pointers free list. + v := gclinkptr(ptr) + v.ptr().next = c.reusableNoscan[spc] + c.reusableNoscan[spc] = v +} + +// hasReusableNoscan reports whether there is a reusable object available for +// a noscan spc. +func (c *mcache) hasReusableNoscan(spc spanClass) bool { + if !runtimeFreegcEnabled { + return false + } + return c.reusableNoscan[spc] != 0 +} diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 08a0057be7..d2ff063b00 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -435,7 +435,7 @@ type mspan struct { // indicating a free object. freeindex is then adjusted so that subsequent scans begin // just past the newly discovered free object. // - // If freeindex == nelems, this span has no free objects. + // If freeindex == nelems, this span has no free objects, though might have reusable objects. // // allocBits is a bitmap of objects in this span. // If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0 |
