diff options
| author | Cherry Mui <cherryyz@google.com> | 2025-11-20 14:40:43 -0500 |
|---|---|---|
| committer | Cherry Mui <cherryyz@google.com> | 2025-11-20 14:40:43 -0500 |
| commit | e3d4645693bc030b9ff9b867f1d374a1d72ef2fe (patch) | |
| tree | 5d9c6783b4b1901e072ed253acc6ecdd909b23bc /src/runtime/malloc.go | |
| parent | 95b4ad525fc8d70c881960ab9f75f31548023bed (diff) | |
| parent | ca37d24e0b9369b8086959df5bc230b38bf98636 (diff) | |
| download | go-e3d4645693bc030b9ff9b867f1d374a1d72ef2fe.tar.xz | |
[dev.simd] all: merge master (ca37d24) into dev.simd
Conflicts:
- src/cmd/compile/internal/typecheck/builtin.go
Merge List:
+ 2025-11-20 ca37d24e0b net/http: drop unused "broken" field from persistConn
+ 2025-11-20 4b740af56a cmd/internal/obj/x86: handle global reference in From3 in dynlink mode
+ 2025-11-20 790384c6c2 spec: adjust rule for type parameter on RHS of alias declaration
+ 2025-11-20 a49b0302d0 net/http: correctly close fake net.Conns
+ 2025-11-20 32f5aadd2f cmd/compile: stack allocate backing stores during append
+ 2025-11-20 a18aff8057 runtime: select GC mark workers during start-the-world
+ 2025-11-20 829779f4fe runtime: split findRunnableGCWorker in two
+ 2025-11-20 ab59569099 go/version: use "custom" as an example of a version suffix
+ 2025-11-19 c4bb9653ba cmd/compile: Implement LoweredZeroLoop with LSX Instruction on loong64
+ 2025-11-19 7f2ae21fb4 cmd/internal/obj/loong64: add MULW.D.W[U] instructions
+ 2025-11-19 a2946f2385 crypto: add Encapsulator and Decapsulator interfaces
+ 2025-11-19 6b83bd7146 crypto/ecdh: add KeyExchanger interface
+ 2025-11-19 4fef9f8b55 go/types, types2: fix object path for grouped declaration statements
+ 2025-11-19 33529db142 spec: escape double-ampersands
+ 2025-11-19 dc42565a20 cmd/compile: fix control flow for unsigned divisions proof relations
+ 2025-11-19 e64023dcbf cmd/compile: cleanup useless if statement in prove
+ 2025-11-19 2239520d1c test: go fmt prove.go tests
+ 2025-11-19 489d3dafb7 math: switch s390x math.Pow to generic implementation
+ 2025-11-18 8c41a482f9 runtime: add dlog.hexdump
+ 2025-11-18 e912618bd2 runtime: add hexdumper
+ 2025-11-18 2cf9d4b62f Revert "net/http: do not discard body content when closing it within request handlers"
+ 2025-11-18 4d0658bb08 cmd/compile: prefer fixed registers for values
+ 2025-11-18 ba634ca5c7 cmd/compile: fold boolean NOT into branches
+ 2025-11-18 8806d53c10 cmd/link: align sections, not symbols after DWARF compress
+ 2025-11-18 c93766007d runtime: do not print recovered when double panic with the same value
+ 2025-11-18 9859b43643 cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64
+ 2025-11-17 b9ef0633f6 cmd/internal/sys,internal/goarch,runtime: enable the use of compressed instructions on riscv64
+ 2025-11-17 a087dea869 debug/elf: sync new loong64 relocation types up to LoongArch ELF psABI v20250521
+ 2025-11-17 e1a12c781f cmd/compile: use 32x32->64 multiplies on arm64
+ 2025-11-17 6caab99026 runtime: relax TestMemoryLimit on darwin a bit more
+ 2025-11-17 eda2e8c683 runtime: clear frame pointer at thread entry points
+ 2025-11-17 6919858338 runtime: rename findrunnable references to findRunnable
+ 2025-11-17 8e734ec954 go/ast: fix BasicLit.End position for raw strings containing \r
+ 2025-11-17 592775ec7d crypto/mlkem: avoid a few unnecessary inverse NTT calls
+ 2025-11-17 590cf18daf crypto/mlkem/mlkemtest: add derandomized Encapsulate768/1024
+ 2025-11-17 c12c337099 cmd/compile: teach prove about subtract idioms
+ 2025-11-17 bc15963813 cmd/compile: clean up prove pass
+ 2025-11-17 1297fae708 go/token: add (*File).End method
+ 2025-11-17 65c09eafdf runtime: hoist invariant code out of heapBitsSmallForAddrInline
+ 2025-11-17 594129b80c internal/runtime/maps: update doc for table.Clear
+ 2025-11-15 c58d075e9a crypto/rsa: deprecate PKCS#1 v1.5 encryption
+ 2025-11-14 d55ecea9e5 runtime: usleep before stealing runnext only if not in syscall
+ 2025-11-14 410ef44f00 cmd: update x/tools to 59ff18c
+ 2025-11-14 50128a2154 runtime: support runtime.freegc in size-specialized mallocs for noscan objects
+ 2025-11-14 c3708350a4 cmd/go: tests: rename git-min-vers->git-sha256
+ 2025-11-14 aea881230d std: fix printf("%q", int) mistakes
+ 2025-11-14 120f1874ef runtime: add more precise test of assist credit handling for runtime.freegc
+ 2025-11-14 fecfcaa4f6 runtime: add runtime.freegc to reduce GC work
+ 2025-11-14 5a347b775e runtime: set GOEXPERIMENT=runtimefreegc to disabled by default
+ 2025-11-14 1a03d0db3f runtime: skip tests for GOEXPERIMENT=arenas that do not handle clobberfree=1
+ 2025-11-14 cb0d9980f5 net/http: do not discard body content when closing it within request handlers
+ 2025-11-14 03ed43988f cmd/compile: allow multi-field structs to be stored directly in interfaces
+ 2025-11-14 1bb1f2bf0c runtime: put AddCleanup cleanup arguments in their own allocation
+ 2025-11-14 9fd2e44439 runtime: add AddCleanup benchmark
+ 2025-11-14 80c91eedbb runtime: ensure weak handles end up in their own allocation
+ 2025-11-14 7a8d0b5d53 runtime: add debug mode to extend _Grunning-without-P windows
+ 2025-11-14 710abf74da internal/runtime/cgobench: add Go function call benchmark for comparison
+ 2025-11-14 b24aec598b doc, cmd/internal/obj/riscv: document the riscv64 assembler
+ 2025-11-14 a0e738c657 cmd/compile/internal: remove incorrect riscv64 SLTI rule
+ 2025-11-14 2cdcc4150b cmd/compile: fold negation into multiplication
+ 2025-11-14 b57962b7c7 bytes: fix panic in bytes.Buffer.Peek
+ 2025-11-14 0a569528ea cmd/compile: optimize comparisons with single bit difference
+ 2025-11-14 1e5e6663e9 cmd/compile: remove unnecessary casts and types from riscv64 rules
+ 2025-11-14 ddd8558e61 go/types, types2: swap object.color for Checker.objPathIdx
+ 2025-11-14 9daaab305c cmd/link/internal/ld: make runtime.buildVersion with experiments valid
+ 2025-11-13 d50a571ddf test: fix tests to work with sizespecializedmalloc turned off
+ 2025-11-13 704f841eab cmd/trace: annotation proc start/stop with thread and proc always
+ 2025-11-13 17a02b9106 net/http: remove unused isLitOrSingle and isNotToken
+ 2025-11-13 ff61991aed cmd/go: fix flaky TestScript/mod_get_direct
+ 2025-11-13 129d0cb543 net/http/cgi: accept INCLUDED as protocol for server side includes
+ 2025-11-13 77c5130100 go/types: minor simplification
+ 2025-11-13 7601cd3880 go/types: generate cycles.go
+ 2025-11-13 7a372affd9 go/types, types2: rename definedType to declaredType and clarify docs
Change-Id: Ibaa9bdb982364892f80e511c1bb12661fcd5fb86
Diffstat (limited to 'src/runtime/malloc.go')
| -rw-r--r-- | src/runtime/malloc.go | 336 |
1 files changed, 332 insertions, 4 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index fc4f21b532..d49dacaf68 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1080,7 +1080,8 @@ func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, checkGCTrigger // // We might consider turning these on by default; many of them previously were. // They account for a few % of mallocgc's cost though, which does matter somewhat -// at scale. +// at scale. (When testing changes to malloc, consider enabling this, and also +// some function-local 'doubleCheck' consts such as in mbitmap.go currently.) const doubleCheckMalloc = false // sizeSpecializedMallocEnabled is the set of conditions where we enable the size-specialized @@ -1089,6 +1090,14 @@ const doubleCheckMalloc = false // properly on plan9, so size-specialized malloc is also disabled on plan9. const sizeSpecializedMallocEnabled = goexperiment.SizeSpecializedMalloc && GOOS != "plan9" && !asanenabled && !raceenabled && !msanenabled && !valgrindenabled +// runtimeFreegcEnabled is the set of conditions where we enable the runtime.freegc +// implementation and the corresponding allocation-related changes: the experiment must be +// enabled, and none of the memory sanitizers should be enabled. We allow the race detector, +// in contrast to sizeSpecializedMallocEnabled. +// TODO(thepudds): it would be nice to check Valgrind integration, though there are some hints +// there might not be any canned tests in tree for Go's integration with Valgrind. +const runtimeFreegcEnabled = goexperiment.RuntimeFreegc && !asanenabled && !msanenabled && !valgrindenabled + // Allocate an object of size bytes. // Small objects are allocated from the per-P cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. @@ -1150,7 +1159,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { size += asanRZ } - // Assist the GC if needed. + // Assist the GC if needed. (On the reuse path, we currently compensate for this; + // changes here might require changes there.) if gcBlackenEnabled != 0 { deductAssistCredit(size) } @@ -1413,6 +1423,16 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe size = uintptr(gc.SizeClassToSize[sizeclass]) spc := makeSpanClass(sizeclass, true) span := c.alloc[spc] + + // First, check for a reusable object. + if runtimeFreegcEnabled && c.hasReusableNoscan(spc) { + // We have a reusable object, use it. + x := mallocgcSmallNoscanReuse(c, span, spc, size, needzero) + mp.mallocing = 0 + releasem(mp) + return x, size + } + v := nextFreeFast(span) if v == 0 { v, span, checkGCTrigger = c.nextFree(spc) @@ -1472,6 +1492,55 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe return x, size } +// mallocgcSmallNoscanReuse returns a previously freed noscan object after preparing it for reuse. +// It must only be called if hasReusableNoscan returned true. +func mallocgcSmallNoscanReuse(c *mcache, span *mspan, spc spanClass, size uintptr, needzero bool) unsafe.Pointer { + // TODO(thepudds): could nextFreeFast, nextFree and nextReusable return unsafe.Pointer? + // Maybe doesn't matter. gclinkptr might be for historical reasons. + v, span := c.nextReusableNoScan(span, spc) + x := unsafe.Pointer(v) + + // Compensate for the GC assist credit deducted in mallocgc (before calling us and + // after we return) because this is not a newly allocated object. We use the full slot + // size (elemsize) here because that's what mallocgc deducts overall. Note we only + // adjust this when gcBlackenEnabled is true, which follows mallocgc behavior. + // TODO(thepudds): a follow-up CL adds a more specific test of our assist credit + // handling, including for validating internal fragmentation handling. + if gcBlackenEnabled != 0 { + addAssistCredit(size) + } + + // This is a previously used object, so only check needzero (and not span.needzero) + // for clearing. + if needzero { + memclrNoHeapPointers(x, size) + } + + // See publicationBarrier comment in mallocgcSmallNoscan. + publicationBarrier() + + // Finish and return. Note that we do not update span.freeIndexForScan, profiling info, + // nor do we check gcTrigger. + // TODO(thepudds): the current approach is viable for a GOEXPERIMENT, but + // means we do not profile reused heap objects. Ultimately, we will need a better + // approach for profiling, or at least ensure we are not introducing bias in the + // profiled allocations. + // TODO(thepudds): related, we probably want to adjust how allocs and frees are counted + // in the existing stats. Currently, reused objects are not counted as allocs nor + // frees, but instead roughly appear as if the original heap object lived on. We + // probably will also want some additional runtime/metrics, and generally think about + // user-facing observability & diagnostics, though all this likely can wait for an + // official proposal. + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + gcmarknewobject(span, uintptr(x)) + } + return x +} + func mallocgcSmallScanNoHeader(size uintptr, typ *_type) (unsafe.Pointer, uintptr) { // Set mp.mallocing to keep from being preempted by GC. mp := acquirem() @@ -1816,8 +1885,6 @@ func postMallocgcDebug(x unsafe.Pointer, elemsize uintptr, typ *_type) { // by size bytes, and assists the GC if necessary. // // Caller must be preemptible. -// -// Returns the G for which the assist credit was accounted. func deductAssistCredit(size uintptr) { // Charge the current user G for this allocation. assistG := getg() @@ -1836,6 +1903,267 @@ func deductAssistCredit(size uintptr) { } } +// addAssistCredit is like deductAssistCredit, +// but adds credit rather than removes, +// and never calls gcAssistAlloc. +func addAssistCredit(size uintptr) { + // Credit the current user G. + assistG := getg() + if assistG.m.curg != nil { // TODO(thepudds): do we need to do this? + assistG = assistG.m.curg + } + // Credit the size against the G. + assistG.gcAssistBytes += int64(size) +} + +const ( + // doubleCheckReusable enables some additional invariant checks for the + // runtime.freegc and reusable objects. Note that some of these checks alter timing, + // and it is good to test changes with and without this enabled. + doubleCheckReusable = false + + // debugReusableLog enables some printlns for runtime.freegc and reusable objects. + debugReusableLog = false +) + +// freegc records that a heap object is reusable and available for +// immediate reuse in a subsequent mallocgc allocation, without +// needing to wait for the GC cycle to progress. +// +// The information is recorded in a free list stored in the +// current P's mcache. The caller must pass in the user size +// and whether the object has pointers, which allows a faster free +// operation. +// +// freegc must be called by the effective owner of ptr who knows +// the pointer is logically dead, with no possible aliases that might +// be used past that moment. In other words, ptr must be the +// last and only pointer to its referent. +// +// The intended caller is the compiler. +// +// Note: please do not send changes that attempt to add freegc calls +// to the standard library. +// +// ptr must point to a heap object or into the current g's stack, +// in which case freegc is a no-op. In particular, ptr must not point +// to memory in the data or bss sections, which is partially enforced. +// For objects with a malloc header, ptr should point mallocHeaderSize bytes +// past the base; otherwise, ptr should point to the base of the heap object. +// In other words, ptr should be the same pointer that was returned by mallocgc. +// +// In addition, the caller must know that ptr's object has no specials, such +// as might have been created by a call to SetFinalizer or AddCleanup. +// (Internally, the runtime deals appropriately with internally-created +// specials, such as specials for memory profiling). +// +// If the size of ptr's object is less than 16 bytes or greater than +// 32KiB - gc.MallocHeaderSize bytes, freegc is currently a no-op. It must only +// be called in alloc-safe places. It currently throws if noscan is false +// (support for which is implemented in a later CL in our stack). +// +// Note that freegc accepts an unsafe.Pointer and hence keeps the pointer +// alive. It therefore could be a pessimization in some cases (such +// as a long-lived function) if the caller does not call freegc before +// or roughly when the liveness analysis of the compiler +// would otherwise have determined ptr's object is reclaimable by the GC. +func freegc(ptr unsafe.Pointer, size uintptr, noscan bool) bool { + if !runtimeFreegcEnabled || !reusableSize(size) { + return false + } + if sizeSpecializedMallocEnabled && !noscan { + // TODO(thepudds): temporarily disable freegc with SizeSpecializedMalloc for pointer types + // until we finish integrating. + return false + } + + if ptr == nil { + throw("freegc nil") + } + + // Set mp.mallocing to keep from being preempted by GC. + // Otherwise, the GC could flush our mcache or otherwise cause problems. + mp := acquirem() + if mp.mallocing != 0 { + throw("freegc deadlock") + } + if mp.gsignal == getg() { + throw("freegc during signal") + } + mp.mallocing = 1 + + if mp.curg.stack.lo <= uintptr(ptr) && uintptr(ptr) < mp.curg.stack.hi { + // This points into our stack, so free is a no-op. + mp.mallocing = 0 + releasem(mp) + return false + } + + if doubleCheckReusable { + // TODO(thepudds): we could enforce no free on globals in bss or data. Maybe by + // checking span via spanOf or spanOfHeap, or maybe walk from firstmoduledata + // like isGoPointerWithoutSpan, or activeModules, or something. If so, we might + // be able to delay checking until reuse (e.g., check span just before reusing, + // though currently we don't always need to lookup a span on reuse). If we think + // no usage patterns could result in globals, maybe enforcement for globals could + // be behind -d=checkptr=1 or similar. The compiler can have knowledge of where + // a variable is allocated, but stdlib does not, although there are certain + // usage patterns that cannot result in a global. + // TODO(thepudds): separately, consider a local debugReusableMcacheOnly here + // to ignore freed objects if not in mspan in mcache, maybe when freeing and reading, + // by checking something like s.base() <= uintptr(v) && uintptr(v) < s.limit. Or + // maybe a GODEBUG or compiler debug flag. + span := spanOf(uintptr(ptr)) + if span == nil { + throw("nextReusable: nil span for pointer in free list") + } + if state := span.state.get(); state != mSpanInUse { + throw("nextReusable: span is not in use") + } + } + + if debug.clobberfree != 0 { + clobberfree(ptr, size) + } + + // We first check if p is still in our per-P cache. + // Get our per-P cache for small objects. + c := getMCache(mp) + if c == nil { + throw("freegc called without a P or outside bootstrapping") + } + + v := uintptr(ptr) + if !noscan && !heapBitsInSpan(size) { + // mallocgcSmallScanHeader expects to get the base address of the object back + // from the findReusable funcs (as well as from nextFreeFast and nextFree), and + // not mallocHeaderSize bytes into a object, so adjust that here. + v -= mallocHeaderSize + + // The size class lookup wants size to be adjusted by mallocHeaderSize. + size += mallocHeaderSize + } + + // TODO(thepudds): should verify (behind doubleCheckReusable constant) that our calculated + // sizeclass here matches what's in span found via spanOf(ptr) or findObject(ptr). + var sizeclass uint8 + if size <= gc.SmallSizeMax-8 { + sizeclass = gc.SizeToSizeClass8[divRoundUp(size, gc.SmallSizeDiv)] + } else { + sizeclass = gc.SizeToSizeClass128[divRoundUp(size-gc.SmallSizeMax, gc.LargeSizeDiv)] + } + + spc := makeSpanClass(sizeclass, noscan) + s := c.alloc[spc] + + if debugReusableLog { + if s.base() <= uintptr(v) && uintptr(v) < s.limit { + println("freegc [in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled) + } else { + println("freegc [NOT in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled) + } + } + + if noscan { + c.addReusableNoscan(spc, uintptr(v)) + } else { + // TODO(thepudds): implemented in later CL in our stack. + throw("freegc called for object with pointers, not yet implemented") + } + + // For stats, for now we leave allocCount alone, roughly pretending to the rest + // of the system that this potential reuse never happened. + + mp.mallocing = 0 + releasem(mp) + + return true +} + +// nextReusableNoScan returns the next reusable object for a noscan span, +// or 0 if no reusable object is found. +func (c *mcache) nextReusableNoScan(s *mspan, spc spanClass) (gclinkptr, *mspan) { + if !runtimeFreegcEnabled { + return 0, s + } + + // Pop a reusable pointer from the free list for this span class. + v := c.reusableNoscan[spc] + if v == 0 { + return 0, s + } + c.reusableNoscan[spc] = v.ptr().next + + if debugReusableLog { + println("reusing from ptr free list:", hex(v), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled) + } + if doubleCheckReusable { + doubleCheckNextReusable(v) // debug only sanity check + } + + // For noscan spans, we only need the span if the write barrier is enabled (so that our caller + // can call gcmarknewobject to allocate black). If the write barrier is enabled, we can skip + // looking up the span when the pointer is in a span in the mcache. + if !writeBarrier.enabled { + return v, nil + } + if s.base() <= uintptr(v) && uintptr(v) < s.limit { + // Return the original span. + return v, s + } + + // We must find and return the span. + span := spanOf(uintptr(v)) + if span == nil { + // TODO(thepudds): construct a test that triggers this throw. + throw("nextReusableNoScan: nil span for pointer in reusable object free list") + } + + return v, span +} + +// doubleCheckNextReusable checks some invariants. +// TODO(thepudds): will probably delete some of this. Can mostly be ignored for review. +func doubleCheckNextReusable(v gclinkptr) { + // TODO(thepudds): should probably take the spanClass as well to confirm expected + // sizeclass match. + _, span, objIndex := findObject(uintptr(v), 0, 0) + if span == nil { + throw("nextReusable: nil span for pointer in free list") + } + if state := span.state.get(); state != mSpanInUse { + throw("nextReusable: span is not in use") + } + if uintptr(v) < span.base() || uintptr(v) >= span.limit { + throw("nextReusable: span is not in range") + } + if span.objBase(uintptr(v)) != uintptr(v) { + print("nextReusable: v=", hex(v), " base=", hex(span.objBase(uintptr(v))), "\n") + throw("nextReusable: v is non-base-address for object found on pointer free list") + } + if span.isFree(objIndex) { + throw("nextReusable: pointer on free list is free") + } + + const debugReusableEnsureSwept = false + if debugReusableEnsureSwept { + // Currently disabled. + // Note: ensureSwept here alters behavior (not just an invariant check). + span.ensureSwept() + if span.isFree(objIndex) { + throw("nextReusable: pointer on free list is free after ensureSwept") + } + } +} + +// reusableSize reports if size is a currently supported size for a reusable object. +func reusableSize(size uintptr) bool { + if size < maxTinySize || size > maxSmallSize-mallocHeaderSize { + return false + } + return true +} + // memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers // on chunks of the buffer to be zeroed, with opportunities for preemption // along the way. memclrNoHeapPointers contains no safepoints and also |
