aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/malloc.go
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2025-11-20 14:40:43 -0500
committerCherry Mui <cherryyz@google.com>2025-11-20 14:40:43 -0500
commite3d4645693bc030b9ff9b867f1d374a1d72ef2fe (patch)
tree5d9c6783b4b1901e072ed253acc6ecdd909b23bc /src/runtime/malloc.go
parent95b4ad525fc8d70c881960ab9f75f31548023bed (diff)
parentca37d24e0b9369b8086959df5bc230b38bf98636 (diff)
downloadgo-e3d4645693bc030b9ff9b867f1d374a1d72ef2fe.tar.xz
[dev.simd] all: merge master (ca37d24) into dev.simd
Conflicts: - src/cmd/compile/internal/typecheck/builtin.go Merge List: + 2025-11-20 ca37d24e0b net/http: drop unused "broken" field from persistConn + 2025-11-20 4b740af56a cmd/internal/obj/x86: handle global reference in From3 in dynlink mode + 2025-11-20 790384c6c2 spec: adjust rule for type parameter on RHS of alias declaration + 2025-11-20 a49b0302d0 net/http: correctly close fake net.Conns + 2025-11-20 32f5aadd2f cmd/compile: stack allocate backing stores during append + 2025-11-20 a18aff8057 runtime: select GC mark workers during start-the-world + 2025-11-20 829779f4fe runtime: split findRunnableGCWorker in two + 2025-11-20 ab59569099 go/version: use "custom" as an example of a version suffix + 2025-11-19 c4bb9653ba cmd/compile: Implement LoweredZeroLoop with LSX Instruction on loong64 + 2025-11-19 7f2ae21fb4 cmd/internal/obj/loong64: add MULW.D.W[U] instructions + 2025-11-19 a2946f2385 crypto: add Encapsulator and Decapsulator interfaces + 2025-11-19 6b83bd7146 crypto/ecdh: add KeyExchanger interface + 2025-11-19 4fef9f8b55 go/types, types2: fix object path for grouped declaration statements + 2025-11-19 33529db142 spec: escape double-ampersands + 2025-11-19 dc42565a20 cmd/compile: fix control flow for unsigned divisions proof relations + 2025-11-19 e64023dcbf cmd/compile: cleanup useless if statement in prove + 2025-11-19 2239520d1c test: go fmt prove.go tests + 2025-11-19 489d3dafb7 math: switch s390x math.Pow to generic implementation + 2025-11-18 8c41a482f9 runtime: add dlog.hexdump + 2025-11-18 e912618bd2 runtime: add hexdumper + 2025-11-18 2cf9d4b62f Revert "net/http: do not discard body content when closing it within request handlers" + 2025-11-18 4d0658bb08 cmd/compile: prefer fixed registers for values + 2025-11-18 ba634ca5c7 cmd/compile: fold boolean NOT into branches + 2025-11-18 8806d53c10 cmd/link: align sections, not symbols after DWARF compress + 2025-11-18 c93766007d runtime: do not print recovered when double panic with the same value + 2025-11-18 9859b43643 cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64 + 2025-11-17 b9ef0633f6 cmd/internal/sys,internal/goarch,runtime: enable the use of compressed instructions on riscv64 + 2025-11-17 a087dea869 debug/elf: sync new loong64 relocation types up to LoongArch ELF psABI v20250521 + 2025-11-17 e1a12c781f cmd/compile: use 32x32->64 multiplies on arm64 + 2025-11-17 6caab99026 runtime: relax TestMemoryLimit on darwin a bit more + 2025-11-17 eda2e8c683 runtime: clear frame pointer at thread entry points + 2025-11-17 6919858338 runtime: rename findrunnable references to findRunnable + 2025-11-17 8e734ec954 go/ast: fix BasicLit.End position for raw strings containing \r + 2025-11-17 592775ec7d crypto/mlkem: avoid a few unnecessary inverse NTT calls + 2025-11-17 590cf18daf crypto/mlkem/mlkemtest: add derandomized Encapsulate768/1024 + 2025-11-17 c12c337099 cmd/compile: teach prove about subtract idioms + 2025-11-17 bc15963813 cmd/compile: clean up prove pass + 2025-11-17 1297fae708 go/token: add (*File).End method + 2025-11-17 65c09eafdf runtime: hoist invariant code out of heapBitsSmallForAddrInline + 2025-11-17 594129b80c internal/runtime/maps: update doc for table.Clear + 2025-11-15 c58d075e9a crypto/rsa: deprecate PKCS#1 v1.5 encryption + 2025-11-14 d55ecea9e5 runtime: usleep before stealing runnext only if not in syscall + 2025-11-14 410ef44f00 cmd: update x/tools to 59ff18c + 2025-11-14 50128a2154 runtime: support runtime.freegc in size-specialized mallocs for noscan objects + 2025-11-14 c3708350a4 cmd/go: tests: rename git-min-vers->git-sha256 + 2025-11-14 aea881230d std: fix printf("%q", int) mistakes + 2025-11-14 120f1874ef runtime: add more precise test of assist credit handling for runtime.freegc + 2025-11-14 fecfcaa4f6 runtime: add runtime.freegc to reduce GC work + 2025-11-14 5a347b775e runtime: set GOEXPERIMENT=runtimefreegc to disabled by default + 2025-11-14 1a03d0db3f runtime: skip tests for GOEXPERIMENT=arenas that do not handle clobberfree=1 + 2025-11-14 cb0d9980f5 net/http: do not discard body content when closing it within request handlers + 2025-11-14 03ed43988f cmd/compile: allow multi-field structs to be stored directly in interfaces + 2025-11-14 1bb1f2bf0c runtime: put AddCleanup cleanup arguments in their own allocation + 2025-11-14 9fd2e44439 runtime: add AddCleanup benchmark + 2025-11-14 80c91eedbb runtime: ensure weak handles end up in their own allocation + 2025-11-14 7a8d0b5d53 runtime: add debug mode to extend _Grunning-without-P windows + 2025-11-14 710abf74da internal/runtime/cgobench: add Go function call benchmark for comparison + 2025-11-14 b24aec598b doc, cmd/internal/obj/riscv: document the riscv64 assembler + 2025-11-14 a0e738c657 cmd/compile/internal: remove incorrect riscv64 SLTI rule + 2025-11-14 2cdcc4150b cmd/compile: fold negation into multiplication + 2025-11-14 b57962b7c7 bytes: fix panic in bytes.Buffer.Peek + 2025-11-14 0a569528ea cmd/compile: optimize comparisons with single bit difference + 2025-11-14 1e5e6663e9 cmd/compile: remove unnecessary casts and types from riscv64 rules + 2025-11-14 ddd8558e61 go/types, types2: swap object.color for Checker.objPathIdx + 2025-11-14 9daaab305c cmd/link/internal/ld: make runtime.buildVersion with experiments valid + 2025-11-13 d50a571ddf test: fix tests to work with sizespecializedmalloc turned off + 2025-11-13 704f841eab cmd/trace: annotation proc start/stop with thread and proc always + 2025-11-13 17a02b9106 net/http: remove unused isLitOrSingle and isNotToken + 2025-11-13 ff61991aed cmd/go: fix flaky TestScript/mod_get_direct + 2025-11-13 129d0cb543 net/http/cgi: accept INCLUDED as protocol for server side includes + 2025-11-13 77c5130100 go/types: minor simplification + 2025-11-13 7601cd3880 go/types: generate cycles.go + 2025-11-13 7a372affd9 go/types, types2: rename definedType to declaredType and clarify docs Change-Id: Ibaa9bdb982364892f80e511c1bb12661fcd5fb86
Diffstat (limited to 'src/runtime/malloc.go')
-rw-r--r--src/runtime/malloc.go336
1 files changed, 332 insertions, 4 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index fc4f21b532..d49dacaf68 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1080,7 +1080,8 @@ func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, checkGCTrigger
//
// We might consider turning these on by default; many of them previously were.
// They account for a few % of mallocgc's cost though, which does matter somewhat
-// at scale.
+// at scale. (When testing changes to malloc, consider enabling this, and also
+// some function-local 'doubleCheck' consts such as in mbitmap.go currently.)
const doubleCheckMalloc = false
// sizeSpecializedMallocEnabled is the set of conditions where we enable the size-specialized
@@ -1089,6 +1090,14 @@ const doubleCheckMalloc = false
// properly on plan9, so size-specialized malloc is also disabled on plan9.
const sizeSpecializedMallocEnabled = goexperiment.SizeSpecializedMalloc && GOOS != "plan9" && !asanenabled && !raceenabled && !msanenabled && !valgrindenabled
+// runtimeFreegcEnabled is the set of conditions where we enable the runtime.freegc
+// implementation and the corresponding allocation-related changes: the experiment must be
+// enabled, and none of the memory sanitizers should be enabled. We allow the race detector,
+// in contrast to sizeSpecializedMallocEnabled.
+// TODO(thepudds): it would be nice to check Valgrind integration, though there are some hints
+// there might not be any canned tests in tree for Go's integration with Valgrind.
+const runtimeFreegcEnabled = goexperiment.RuntimeFreegc && !asanenabled && !msanenabled && !valgrindenabled
+
// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
@@ -1150,7 +1159,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
size += asanRZ
}
- // Assist the GC if needed.
+ // Assist the GC if needed. (On the reuse path, we currently compensate for this;
+ // changes here might require changes there.)
if gcBlackenEnabled != 0 {
deductAssistCredit(size)
}
@@ -1413,6 +1423,16 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe
size = uintptr(gc.SizeClassToSize[sizeclass])
spc := makeSpanClass(sizeclass, true)
span := c.alloc[spc]
+
+ // First, check for a reusable object.
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+ // We have a reusable object, use it.
+ x := mallocgcSmallNoscanReuse(c, span, spc, size, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ return x, size
+ }
+
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
@@ -1472,6 +1492,55 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe
return x, size
}
+// mallocgcSmallNoscanReuse returns a previously freed noscan object after preparing it for reuse.
+// It must only be called if hasReusableNoscan returned true.
+func mallocgcSmallNoscanReuse(c *mcache, span *mspan, spc spanClass, size uintptr, needzero bool) unsafe.Pointer {
+ // TODO(thepudds): could nextFreeFast, nextFree and nextReusable return unsafe.Pointer?
+ // Maybe doesn't matter. gclinkptr might be for historical reasons.
+ v, span := c.nextReusableNoScan(span, spc)
+ x := unsafe.Pointer(v)
+
+ // Compensate for the GC assist credit deducted in mallocgc (before calling us and
+ // after we return) because this is not a newly allocated object. We use the full slot
+ // size (elemsize) here because that's what mallocgc deducts overall. Note we only
+ // adjust this when gcBlackenEnabled is true, which follows mallocgc behavior.
+ // TODO(thepudds): a follow-up CL adds a more specific test of our assist credit
+ // handling, including for validating internal fragmentation handling.
+ if gcBlackenEnabled != 0 {
+ addAssistCredit(size)
+ }
+
+ // This is a previously used object, so only check needzero (and not span.needzero)
+ // for clearing.
+ if needzero {
+ memclrNoHeapPointers(x, size)
+ }
+
+ // See publicationBarrier comment in mallocgcSmallNoscan.
+ publicationBarrier()
+
+ // Finish and return. Note that we do not update span.freeIndexForScan, profiling info,
+ // nor do we check gcTrigger.
+ // TODO(thepudds): the current approach is viable for a GOEXPERIMENT, but
+ // means we do not profile reused heap objects. Ultimately, we will need a better
+ // approach for profiling, or at least ensure we are not introducing bias in the
+ // profiled allocations.
+ // TODO(thepudds): related, we probably want to adjust how allocs and frees are counted
+ // in the existing stats. Currently, reused objects are not counted as allocs nor
+ // frees, but instead roughly appear as if the original heap object lived on. We
+ // probably will also want some additional runtime/metrics, and generally think about
+ // user-facing observability & diagnostics, though all this likely can wait for an
+ // official proposal.
+ if writeBarrier.enabled {
+ // Allocate black during GC.
+ // All slots hold nil so no scanning is needed.
+ // This may be racing with GC so do it atomically if there can be
+ // a race marking the bit.
+ gcmarknewobject(span, uintptr(x))
+ }
+ return x
+}
+
func mallocgcSmallScanNoHeader(size uintptr, typ *_type) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
@@ -1816,8 +1885,6 @@ func postMallocgcDebug(x unsafe.Pointer, elemsize uintptr, typ *_type) {
// by size bytes, and assists the GC if necessary.
//
// Caller must be preemptible.
-//
-// Returns the G for which the assist credit was accounted.
func deductAssistCredit(size uintptr) {
// Charge the current user G for this allocation.
assistG := getg()
@@ -1836,6 +1903,267 @@ func deductAssistCredit(size uintptr) {
}
}
+// addAssistCredit is like deductAssistCredit,
+// but adds credit rather than removes,
+// and never calls gcAssistAlloc.
+func addAssistCredit(size uintptr) {
+ // Credit the current user G.
+ assistG := getg()
+ if assistG.m.curg != nil { // TODO(thepudds): do we need to do this?
+ assistG = assistG.m.curg
+ }
+ // Credit the size against the G.
+ assistG.gcAssistBytes += int64(size)
+}
+
+const (
+ // doubleCheckReusable enables some additional invariant checks for the
+ // runtime.freegc and reusable objects. Note that some of these checks alter timing,
+ // and it is good to test changes with and without this enabled.
+ doubleCheckReusable = false
+
+ // debugReusableLog enables some printlns for runtime.freegc and reusable objects.
+ debugReusableLog = false
+)
+
+// freegc records that a heap object is reusable and available for
+// immediate reuse in a subsequent mallocgc allocation, without
+// needing to wait for the GC cycle to progress.
+//
+// The information is recorded in a free list stored in the
+// current P's mcache. The caller must pass in the user size
+// and whether the object has pointers, which allows a faster free
+// operation.
+//
+// freegc must be called by the effective owner of ptr who knows
+// the pointer is logically dead, with no possible aliases that might
+// be used past that moment. In other words, ptr must be the
+// last and only pointer to its referent.
+//
+// The intended caller is the compiler.
+//
+// Note: please do not send changes that attempt to add freegc calls
+// to the standard library.
+//
+// ptr must point to a heap object or into the current g's stack,
+// in which case freegc is a no-op. In particular, ptr must not point
+// to memory in the data or bss sections, which is partially enforced.
+// For objects with a malloc header, ptr should point mallocHeaderSize bytes
+// past the base; otherwise, ptr should point to the base of the heap object.
+// In other words, ptr should be the same pointer that was returned by mallocgc.
+//
+// In addition, the caller must know that ptr's object has no specials, such
+// as might have been created by a call to SetFinalizer or AddCleanup.
+// (Internally, the runtime deals appropriately with internally-created
+// specials, such as specials for memory profiling).
+//
+// If the size of ptr's object is less than 16 bytes or greater than
+// 32KiB - gc.MallocHeaderSize bytes, freegc is currently a no-op. It must only
+// be called in alloc-safe places. It currently throws if noscan is false
+// (support for which is implemented in a later CL in our stack).
+//
+// Note that freegc accepts an unsafe.Pointer and hence keeps the pointer
+// alive. It therefore could be a pessimization in some cases (such
+// as a long-lived function) if the caller does not call freegc before
+// or roughly when the liveness analysis of the compiler
+// would otherwise have determined ptr's object is reclaimable by the GC.
+func freegc(ptr unsafe.Pointer, size uintptr, noscan bool) bool {
+ if !runtimeFreegcEnabled || !reusableSize(size) {
+ return false
+ }
+ if sizeSpecializedMallocEnabled && !noscan {
+ // TODO(thepudds): temporarily disable freegc with SizeSpecializedMalloc for pointer types
+ // until we finish integrating.
+ return false
+ }
+
+ if ptr == nil {
+ throw("freegc nil")
+ }
+
+ // Set mp.mallocing to keep from being preempted by GC.
+ // Otherwise, the GC could flush our mcache or otherwise cause problems.
+ mp := acquirem()
+ if mp.mallocing != 0 {
+ throw("freegc deadlock")
+ }
+ if mp.gsignal == getg() {
+ throw("freegc during signal")
+ }
+ mp.mallocing = 1
+
+ if mp.curg.stack.lo <= uintptr(ptr) && uintptr(ptr) < mp.curg.stack.hi {
+ // This points into our stack, so free is a no-op.
+ mp.mallocing = 0
+ releasem(mp)
+ return false
+ }
+
+ if doubleCheckReusable {
+ // TODO(thepudds): we could enforce no free on globals in bss or data. Maybe by
+ // checking span via spanOf or spanOfHeap, or maybe walk from firstmoduledata
+ // like isGoPointerWithoutSpan, or activeModules, or something. If so, we might
+ // be able to delay checking until reuse (e.g., check span just before reusing,
+ // though currently we don't always need to lookup a span on reuse). If we think
+ // no usage patterns could result in globals, maybe enforcement for globals could
+ // be behind -d=checkptr=1 or similar. The compiler can have knowledge of where
+ // a variable is allocated, but stdlib does not, although there are certain
+ // usage patterns that cannot result in a global.
+ // TODO(thepudds): separately, consider a local debugReusableMcacheOnly here
+ // to ignore freed objects if not in mspan in mcache, maybe when freeing and reading,
+ // by checking something like s.base() <= uintptr(v) && uintptr(v) < s.limit. Or
+ // maybe a GODEBUG or compiler debug flag.
+ span := spanOf(uintptr(ptr))
+ if span == nil {
+ throw("nextReusable: nil span for pointer in free list")
+ }
+ if state := span.state.get(); state != mSpanInUse {
+ throw("nextReusable: span is not in use")
+ }
+ }
+
+ if debug.clobberfree != 0 {
+ clobberfree(ptr, size)
+ }
+
+ // We first check if p is still in our per-P cache.
+ // Get our per-P cache for small objects.
+ c := getMCache(mp)
+ if c == nil {
+ throw("freegc called without a P or outside bootstrapping")
+ }
+
+ v := uintptr(ptr)
+ if !noscan && !heapBitsInSpan(size) {
+ // mallocgcSmallScanHeader expects to get the base address of the object back
+ // from the findReusable funcs (as well as from nextFreeFast and nextFree), and
+ // not mallocHeaderSize bytes into a object, so adjust that here.
+ v -= mallocHeaderSize
+
+ // The size class lookup wants size to be adjusted by mallocHeaderSize.
+ size += mallocHeaderSize
+ }
+
+ // TODO(thepudds): should verify (behind doubleCheckReusable constant) that our calculated
+ // sizeclass here matches what's in span found via spanOf(ptr) or findObject(ptr).
+ var sizeclass uint8
+ if size <= gc.SmallSizeMax-8 {
+ sizeclass = gc.SizeToSizeClass8[divRoundUp(size, gc.SmallSizeDiv)]
+ } else {
+ sizeclass = gc.SizeToSizeClass128[divRoundUp(size-gc.SmallSizeMax, gc.LargeSizeDiv)]
+ }
+
+ spc := makeSpanClass(sizeclass, noscan)
+ s := c.alloc[spc]
+
+ if debugReusableLog {
+ if s.base() <= uintptr(v) && uintptr(v) < s.limit {
+ println("freegc [in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled)
+ } else {
+ println("freegc [NOT in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled)
+ }
+ }
+
+ if noscan {
+ c.addReusableNoscan(spc, uintptr(v))
+ } else {
+ // TODO(thepudds): implemented in later CL in our stack.
+ throw("freegc called for object with pointers, not yet implemented")
+ }
+
+ // For stats, for now we leave allocCount alone, roughly pretending to the rest
+ // of the system that this potential reuse never happened.
+
+ mp.mallocing = 0
+ releasem(mp)
+
+ return true
+}
+
+// nextReusableNoScan returns the next reusable object for a noscan span,
+// or 0 if no reusable object is found.
+func (c *mcache) nextReusableNoScan(s *mspan, spc spanClass) (gclinkptr, *mspan) {
+ if !runtimeFreegcEnabled {
+ return 0, s
+ }
+
+ // Pop a reusable pointer from the free list for this span class.
+ v := c.reusableNoscan[spc]
+ if v == 0 {
+ return 0, s
+ }
+ c.reusableNoscan[spc] = v.ptr().next
+
+ if debugReusableLog {
+ println("reusing from ptr free list:", hex(v), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled)
+ }
+ if doubleCheckReusable {
+ doubleCheckNextReusable(v) // debug only sanity check
+ }
+
+ // For noscan spans, we only need the span if the write barrier is enabled (so that our caller
+ // can call gcmarknewobject to allocate black). If the write barrier is enabled, we can skip
+ // looking up the span when the pointer is in a span in the mcache.
+ if !writeBarrier.enabled {
+ return v, nil
+ }
+ if s.base() <= uintptr(v) && uintptr(v) < s.limit {
+ // Return the original span.
+ return v, s
+ }
+
+ // We must find and return the span.
+ span := spanOf(uintptr(v))
+ if span == nil {
+ // TODO(thepudds): construct a test that triggers this throw.
+ throw("nextReusableNoScan: nil span for pointer in reusable object free list")
+ }
+
+ return v, span
+}
+
+// doubleCheckNextReusable checks some invariants.
+// TODO(thepudds): will probably delete some of this. Can mostly be ignored for review.
+func doubleCheckNextReusable(v gclinkptr) {
+ // TODO(thepudds): should probably take the spanClass as well to confirm expected
+ // sizeclass match.
+ _, span, objIndex := findObject(uintptr(v), 0, 0)
+ if span == nil {
+ throw("nextReusable: nil span for pointer in free list")
+ }
+ if state := span.state.get(); state != mSpanInUse {
+ throw("nextReusable: span is not in use")
+ }
+ if uintptr(v) < span.base() || uintptr(v) >= span.limit {
+ throw("nextReusable: span is not in range")
+ }
+ if span.objBase(uintptr(v)) != uintptr(v) {
+ print("nextReusable: v=", hex(v), " base=", hex(span.objBase(uintptr(v))), "\n")
+ throw("nextReusable: v is non-base-address for object found on pointer free list")
+ }
+ if span.isFree(objIndex) {
+ throw("nextReusable: pointer on free list is free")
+ }
+
+ const debugReusableEnsureSwept = false
+ if debugReusableEnsureSwept {
+ // Currently disabled.
+ // Note: ensureSwept here alters behavior (not just an invariant check).
+ span.ensureSwept()
+ if span.isFree(objIndex) {
+ throw("nextReusable: pointer on free list is free after ensureSwept")
+ }
+ }
+}
+
+// reusableSize reports if size is a currently supported size for a reusable object.
+func reusableSize(size uintptr) bool {
+ if size < maxTinySize || size > maxSmallSize-mallocHeaderSize {
+ return false
+ }
+ return true
+}
+
// memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers
// on chunks of the buffer to be zeroed, with opportunities for preemption
// along the way. memclrNoHeapPointers contains no safepoints and also