From 7e0251bf584c5fe79e95b9c460c7d60a7199d0ae Mon Sep 17 00:00:00 2001 From: Damien Neil Date: Mon, 22 Sep 2025 15:15:40 -0700 Subject: runtime: don't report non-blocked goroutines as "(durable)" in stacks Only append the " (durable)" suffix to a goroutine's status when the goroutine is waiting. Avoids reporting a goroutine as "runnable (durable)". Change-Id: Id679692345afab6e63362ca3eeff16808367e50f Reviewed-on: https://go-review.googlesource.com/c/go/+/705995 LUCI-TryBot-Result: Go LUCI Auto-Submit: Damien Neil Reviewed-by: Michael Pratt --- src/runtime/traceback.go | 1 + 1 file changed, 1 insertion(+) (limited to 'src/runtime') diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 00c0f08e55..00eac59201 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -1249,6 +1249,7 @@ func goroutineheader(gp *g) { print(" (scan)") } if bubble := gp.bubble; bubble != nil && + gpstatus == _Gwaiting && gp.waitreason.isIdleInSynctest() && !stringslite.HasSuffix(status, "(durable)") { // If this isn't a status where the name includes a (durable) -- cgit v1.3-5-g9baa From 5d040df09271ad2f1b0f93abf94a1b2efc8871df Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 10 Apr 2025 16:36:04 +0000 Subject: runtime: use scan kernels in scanSpan [green tea] This is an extra 15-20% faster over the current sparse span scanning when AVX512+GFNI is available and there's sufficient density. For #73581. Change-Id: I9688e09885dd76c5ccab7c492c85a7e14e18ee04 Reviewed-on: https://go-review.googlesource.com/c/go/+/665495 Reviewed-by: Michael Pratt LUCI-TryBot-Result: Go LUCI Auto-Submit: Michael Knyszek --- src/runtime/mgcmark_greenteagc.go | 69 ++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 9 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mgcmark_greenteagc.go b/src/runtime/mgcmark_greenteagc.go index 845857a817..a45845b476 100644 --- a/src/runtime/mgcmark_greenteagc.go +++ b/src/runtime/mgcmark_greenteagc.go @@ -41,6 +41,7 @@ import ( "internal/goarch" "internal/runtime/atomic" "internal/runtime/gc" + "internal/runtime/gc/scan" "internal/runtime/sys" "unsafe" ) @@ -259,7 +260,7 @@ func gcUsesSpanInlineMarkBits(size uintptr) bool { return heapBitsInSpan(size) && size >= 16 } -// tryQueueOnSpan tries to queue p on the span it points to, if it +// tryDeferToSpanScan tries to queue p on the span it points to, if it // points to a small object span (gcUsesSpanQueue size). func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool { if useCheckmark { @@ -608,8 +609,7 @@ func scanSpan(p objptr, gcw *gcWork) { atomic.Or8(bytep, mask) gcw.bytesMarked += uint64(elemsize) if debug.gctrace > 1 { - gcw.stats[spanclass.sizeclass()].spansSparseScanned++ - gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned++ + gcw.stats[spanclass.sizeclass()].sparseObjsScanned++ } b := spanBase + uintptr(objIndex)*elemsize scanObjectSmall(spanBase, b, elemsize, gcw) @@ -631,11 +631,47 @@ func scanSpan(p objptr, gcw *gcWork) { return } gcw.bytesMarked += uint64(objsMarked) * uint64(elemsize) + + // Check if we have enough density to make a dartboard scan + // worthwhile. If not, just do what scanobject does, but + // localized to the span, using the dartboard. + if !scan.HasFastScanSpanPacked() || objsMarked < int(nelems/8) { + if debug.gctrace > 1 { + gcw.stats[spanclass.sizeclass()].spansSparseScanned++ + gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned += uint64(objsMarked) + } + scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan) + return + } + + // Scan the span. + // + // N.B. Use gcw.ptrBuf as the output buffer. This is a bit different + // from scanObjectsSmall, which puts addresses to dereference. ScanSpanPacked + // on the other hand, fills gcw.ptrBuf with already dereferenced pointers. + nptrs := scan.ScanSpanPacked( + unsafe.Pointer(spanBase), + &gcw.ptrBuf[0], + &toScan, + uintptr(spanclass.sizeclass()), + spanPtrMaskUnsafe(spanBase), + ) + gcw.heapScanWork += int64(objsMarked) * int64(elemsize) + if debug.gctrace > 1 { + // Write down some statistics. gcw.stats[spanclass.sizeclass()].spansDenseScanned++ gcw.stats[spanclass.sizeclass()].spanObjsDenseScanned += uint64(objsMarked) } - scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan) + + // Process all the pointers we just got. + for _, p := range gcw.ptrBuf[:nptrs] { + if !tryDeferToSpanScan(p, gcw) { + if obj, span, objIndex := findObject(p, 0, 0); obj != 0 { + greyobject(obj, 0, 0, span, gcw, objIndex) + } + } + } } // spanSetScans sets any unset mark bits that have their mark bits set in the inline mark bits. @@ -798,12 +834,27 @@ func heapBitsSmallForAddrInline(spanBase, addr, elemsize uintptr) uintptr { return read } +// spanPtrMaskUnsafe returns the pointer mask for a span with inline mark bits. +// +// The caller must ensure spanBase is the base of a span that: +// - 1 page in size, +// - Uses inline mark bits, +// - Contains pointers. +func spanPtrMaskUnsafe(spanBase uintptr) *gc.PtrMask { + base := spanBase + gc.PageSize - unsafe.Sizeof(gc.PtrMask{}) - unsafe.Sizeof(spanInlineMarkBits{}) + return (*gc.PtrMask)(unsafe.Pointer(base)) +} + type sizeClassScanStats struct { - spansDenseScanned uint64 - spanObjsDenseScanned uint64 - spansSparseScanned uint64 - spanObjsSparseScanned uint64 - sparseObjsScanned uint64 + spansDenseScanned uint64 // Spans scanned with ScanSpanPacked. + spanObjsDenseScanned uint64 // Objects scanned with ScanSpanPacked. + spansSparseScanned uint64 // Spans scanned with scanObjectsSmall. + spanObjsSparseScanned uint64 // Objects scanned with scanObjectsSmall. + sparseObjsScanned uint64 // Objects scanned with scanobject or scanObjectSmall. + // Note: sparseObjsScanned is sufficient for both cases because + // a particular size class either uses scanobject or scanObjectSmall, + // not both. In the latter case, we also know that there was one + // object scanned per span, so no need for a span counter. } func dumpScanStats() { -- cgit v1.3-5-g9baa From fde10c4ce7f3b32acd886992450dd94cafb699a4 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Fri, 5 Sep 2025 17:39:09 +0000 Subject: runtime: split gcMarkWorkAvailable into two separate conditions Right now, gcMarkWorkAvailable is used in two scenarios. The first is critical: we use it to determine whether we're approaching mark termination, and it's crucial to reaching a fixed point across the ragged barrier in gcMarkDone. The second is a heuristic: should we spin up another GC worker? This change splits gcMarkWorkAvailable into these two separate conditions. This change also deduplicates the logic for updating work.nwait into more abstract helpers "gcBeginWork" and "gcEndWork." This change is solely refactoring, and should be a no-op. There are only two functional changes: - work.nwait is incremented after setting pp.gcMarkWorkerMode in the background worker code. I don't believe this change is observable except if the code fails to update work.nwait (either it results in a non-sensical number, or the stack is corrupted) in which case the goroutine may not be labeled as a mark worker in the resulting stack trace (it should be obvious from the stack frames though). - endCheckmarks also checks work.nwait == work.nproc, which should be fine since we never mutate work.nwait on that path. That extra check should be a no-op. Splitting these two use-cases for gcMarkWorkAvailable is conceptually helpful, and the checks may also diverge from Green Tea once we get rid of the global span queue. Change-Id: I0bec244a14ee82919c4deb7c1575589c0dca1089 Reviewed-on: https://go-review.googlesource.com/c/go/+/701176 Reviewed-by: Michael Pratt Auto-Submit: Michael Knyszek LUCI-TryBot-Result: Go LUCI --- src/runtime/mcheckmark.go | 2 +- src/runtime/mgc.go | 65 ++++++++++++++++++++++++++++++----------------- src/runtime/mgcmark.go | 15 ++--------- src/runtime/mgcpacer.go | 4 +-- src/runtime/proc.go | 6 ++--- 5 files changed, 50 insertions(+), 42 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go index 318f40f2eb..083220f449 100644 --- a/src/runtime/mcheckmark.go +++ b/src/runtime/mcheckmark.go @@ -68,7 +68,7 @@ func startCheckmarks() { // endCheckmarks ends the checkmarks phase. func endCheckmarks() { - if gcMarkWorkAvailable(nil) { + if !gcIsMarkDone() { throw("GC work not flushed") } useCheckmark = false diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 26cec37f74..efefa09475 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -869,10 +869,11 @@ var gcDebugMarkDone struct { // all local work to the global queues where it can be discovered by // other workers. // +// All goroutines performing GC work must call gcBeginWork to signal +// that they're executing GC work. They must call gcEndWork when done. // This should be called when all local mark work has been drained and -// there are no remaining workers. Specifically, when -// -// work.nwait == work.nproc && !gcMarkWorkAvailable(p) +// there are no remaining workers. Specifically, when gcEndWork returns +// true. // // The calling context must be preemptible. // @@ -896,7 +897,7 @@ top: // empty before performing the ragged barrier. Otherwise, // there could be global work that a P could take after the P // has passed the ragged barrier. - if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) { + if !(gcphase == _GCmark && gcIsMarkDone()) { semrelease(&work.markDoneSema) return } @@ -1514,11 +1515,7 @@ func gcBgMarkWorker(ready chan struct{}) { trackLimiterEvent = pp.limiterEvent.start(limiterEventIdleMarkWork, startTime) } - decnwait := atomic.Xadd(&work.nwait, -1) - if decnwait == work.nproc { - println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) - throw("work.nwait was > work.nproc") - } + gcBeginWork() systemstack(func() { // Mark our goroutine preemptible so its stack can be scanned or observed @@ -1570,15 +1567,6 @@ func gcBgMarkWorker(ready chan struct{}) { atomic.Xaddint64(&pp.gcFractionalMarkTime, duration) } - // Was this the last worker and did we run out - // of work? - incnwait := atomic.Xadd(&work.nwait, +1) - if incnwait > work.nproc { - println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode, - "work.nwait=", incnwait, "work.nproc=", work.nproc) - throw("work.nwait > work.nproc") - } - // We'll releasem after this point and thus this P may run // something else. We must clear the worker mode to avoid // attributing the mode to a different (non-worker) G in @@ -1587,7 +1575,7 @@ func gcBgMarkWorker(ready chan struct{}) { // If this worker reached a background mark completion // point, signal the main GC goroutine. - if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { + if gcEndWork() { // We don't need the P-local buffers here, allow // preemption because we may schedule like a regular // goroutine in gcMarkDone (block on locks, etc). @@ -1599,13 +1587,44 @@ func gcBgMarkWorker(ready chan struct{}) { } } -// gcMarkWorkAvailable reports whether executing a mark worker -// on p is potentially useful. p may be nil, in which case it only -// checks the global sources of work. -func gcMarkWorkAvailable(p *p) bool { +// gcShouldScheduleWorker reports whether executing a mark worker +// on p is potentially useful. p may be nil. +func gcShouldScheduleWorker(p *p) bool { if p != nil && !p.gcw.empty() { return true } + return gcMarkWorkAvailable() +} + +// gcIsMarkDone reports whether the mark phase is (probably) done. +func gcIsMarkDone() bool { + return work.nwait == work.nproc && !gcMarkWorkAvailable() +} + +// gcBeginWork signals to the garbage collector that a new worker is +// about to process GC work. +func gcBeginWork() { + decnwait := atomic.Xadd(&work.nwait, -1) + if decnwait == work.nproc { + println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc) + throw("work.nwait was > work.nproc") + } +} + +// gcEndWork signals to the garbage collector that a new worker has just finished +// its work. It reports whether it was the last worker and there's no more work +// to do. If it returns true, the caller must call gcMarkDone. +func gcEndWork() (last bool) { + incnwait := atomic.Xadd(&work.nwait, +1) + if incnwait > work.nproc { + println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc) + throw("work.nwait > work.nproc") + } + return incnwait == work.nproc && !gcMarkWorkAvailable() +} + +// gcMarkWorkAvailable reports whether there's any non-local work available to do. +func gcMarkWorkAvailable() bool { if !work.full.empty() || !work.spanq.empty() { return true // global work available } diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 8b306045c5..b8542383db 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -675,11 +675,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) { startTime := nanotime() trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime) - decnwait := atomic.Xadd(&work.nwait, -1) - if decnwait == work.nproc { - println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc) - throw("nwait > work.nprocs") - } + gcBeginWork() // gcDrainN requires the caller to be preemptible. casGToWaitingForSuspendG(gp, _Grunning, waitReasonGCAssistMarking) @@ -702,14 +698,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) { // If this is the last worker and we ran out of work, // signal a completion point. - incnwait := atomic.Xadd(&work.nwait, +1) - if incnwait > work.nproc { - println("runtime: work.nwait=", incnwait, - "work.nproc=", work.nproc) - throw("work.nwait > work.nproc") - } - - if incnwait == work.nproc && !gcMarkWorkAvailable(nil) { + if gcEndWork() { // This has reached a background completion point. Set // gp.param to a non-nil value to indicate this. It // doesn't matter what we set it to (it just has to be diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go index 044792d6bd..f637ba96b6 100644 --- a/src/runtime/mgcpacer.go +++ b/src/runtime/mgcpacer.go @@ -767,8 +767,8 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) { gcCPULimiter.update(now) } - if !gcMarkWorkAvailable(pp) { - // No work to be done right now. This can happen at + if !gcShouldScheduleWorker(pp) { + // No good reason to schedule a worker. This can happen at // the end of the mark phase when there are still // assists tapering off. Don't bother running a worker // now because it'll just return immediately. diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 51e2c42605..91e1653c7c 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -3125,7 +3125,7 @@ func handoffp(pp *p) { return } // if it has GC work, start it straight away - if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) { + if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) { startm(pp, false, false) return } @@ -3506,7 +3506,7 @@ top: // // If we're in the GC mark phase, can safely scan and blacken objects, // and have work to do, run idle-time marking rather than give up the P. - if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) && gcController.addIdleMarkWorker() { + if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) && gcController.addIdleMarkWorker() { node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) if node != nil { pp.gcMarkWorkerMode = gcMarkWorkerIdleMode @@ -3913,7 +3913,7 @@ func checkIdleGCNoP() (*p, *g) { if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() { return nil, nil } - if !gcMarkWorkAvailable(nil) { + if !gcShouldScheduleWorker(nil) { return nil, nil } -- cgit v1.3-5-g9baa From 69e74b0aacc1de59b618bbb9789a2e7e0cd806b5 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 3 Sep 2025 19:47:28 +0000 Subject: runtime: deduplicate pMask resize code Change-Id: I04a9a69904710a488c685cb9eee9c3313ed8e97b Reviewed-on: https://go-review.googlesource.com/c/go/+/701896 Reviewed-by: Michael Pratt Auto-Submit: Michael Knyszek LUCI-TryBot-Result: Go LUCI --- src/runtime/proc.go | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 91e1653c7c..4154dcd76e 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -5873,8 +5873,6 @@ func procresize(nprocs int32) *p { } sched.procresizetime = now - maskWords := (nprocs + 31) / 32 - // Grow allp if necessary. if nprocs > int32(len(allp)) { // Synchronize with retake, which could be running @@ -5890,19 +5888,8 @@ func procresize(nprocs int32) *p { allp = nallp } - if maskWords <= int32(cap(idlepMask)) { - idlepMask = idlepMask[:maskWords] - timerpMask = timerpMask[:maskWords] - } else { - nidlepMask := make([]uint32, maskWords) - // No need to copy beyond len, old Ps are irrelevant. - copy(nidlepMask, idlepMask) - idlepMask = nidlepMask - - ntimerpMask := make([]uint32, maskWords) - copy(ntimerpMask, timerpMask) - timerpMask = ntimerpMask - } + idlepMask = idlepMask.resize(nprocs) + timerpMask = timerpMask.resize(nprocs) unlock(&allpLock) } @@ -5965,8 +5952,8 @@ func procresize(nprocs int32) *p { if int32(len(allp)) != nprocs { lock(&allpLock) allp = allp[:nprocs] - idlepMask = idlepMask[:maskWords] - timerpMask = timerpMask[:maskWords] + idlepMask = idlepMask.resize(nprocs) + timerpMask = timerpMask.resize(nprocs) unlock(&allpLock) } @@ -6905,6 +6892,22 @@ func (p pMask) clear(id int32) { atomic.And(&p[word], ^mask) } +// resize resizes the pMask and returns a new one. +// +// The result may alias p, so callers are encouraged to +// discard p. Not safe for concurrent use. +func (p pMask) resize(nprocs int32) pMask { + maskWords := (nprocs + 31) / 32 + + if maskWords <= int32(cap(p)) { + return p[:maskWords] + } + newMask := make([]uint32, maskWords) + // No need to copy beyond len, old Ps are irrelevant. + copy(newMask, p) + return newMask +} + // pidleput puts p on the _Pidle list. now must be a relatively recent call // to nanotime or zero. Returns now or the current time if now was zero. // -- cgit v1.3-5-g9baa From e93f439ac4160baf9992f059d2bfb511e23f63c9 Mon Sep 17 00:00:00 2001 From: qmuntal Date: Tue, 23 Sep 2025 10:46:46 +0200 Subject: runtime/cgo: retry when CreateThread fails with ERROR_ACCESS_DENIED _cgo_beginthread used to retry _beginthread only when it failed with EACCESS, but CL 651995 switched to CreateThread and incorrectly mapped EACCESS to ERROR_NOT_ENOUGH_MEMORY. The correct mapping is ERROR_ACCESS_DENIED. Fixes #72814 Fixes #75381 Change-Id: I8ba060114aae4e8249576f11a21eff613caa8001 Reviewed-on: https://go-review.googlesource.com/c/go/+/706075 Reviewed-by: Michael Pratt LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Knyszek --- src/runtime/cgo/gcc_libinit_windows.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/runtime') diff --git a/src/runtime/cgo/gcc_libinit_windows.c b/src/runtime/cgo/gcc_libinit_windows.c index 83fc874348..9275185d6e 100644 --- a/src/runtime/cgo/gcc_libinit_windows.c +++ b/src/runtime/cgo/gcc_libinit_windows.c @@ -145,7 +145,7 @@ void _cgo_beginthread(unsigned long (__stdcall *func)(void*), void* arg) { for (tries = 0; tries < 20; tries++) { thandle = CreateThread(NULL, 0, func, arg, 0, NULL); - if (thandle == 0 && GetLastError() == ERROR_NOT_ENOUGH_MEMORY) { + if (thandle == 0 && GetLastError() == ERROR_ACCESS_DENIED) { // "Insufficient resources", try again in a bit. // // Note that the first Sleep(0) is a yield. -- cgit v1.3-5-g9baa From d7a38adf4c81f0fa83203e37844192182b22680a Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Fri, 15 Aug 2025 17:09:05 +0000 Subject: runtime: eliminate global span queue [green tea] This change removes the locked global span queue and replaces the fixed-size local span queue with a variable-sized local span queue. The variable-sized local span queue grows as needed to accomodate local work. With no global span queue either, GC workers balance work amongst themselves by stealing from each other. The new variable-sized local span queues are inspired by the P-local deque underlying sync.Pool. Unlike the sync.Pool deque, however, both the owning P and stealing Ps take spans from the tail, making this incarnation a strict queue, not a deque. This is intentional, since we want a queue-like order to encourage objects to accumulate on each span. These variable-sized local span queues are crucial to mark termination, just like the global span queue was. To avoid hitting the ragged barrier too often, we must check whether any Ps have any spans on their variable-sized local span queues. We maintain a per-P atomic bitmask (another pMask) that contains this state. We can also use this to speed up stealing by skipping Ps that don't have any local spans. The variable-sized local span queues are slower than the old fixed-size local span queues because of the additional indirection, so this change adds a non-atomic local fixed-size queue. This risks getting work stuck on it, so, similarly to how workbufs work, each worker will occasionally dump some spans onto its local variable-sized queue. This scales much more nicely than dumping to a global queue, but is still visible to all other Ps. For #73581. Change-Id: I814f54d9c3cc7fa7896167746e9823f50943ac22 Reviewed-on: https://go-review.googlesource.com/c/go/+/700496 Reviewed-by: Michael Pratt LUCI-TryBot-Result: Go LUCI --- src/runtime/export_test.go | 24 -- src/runtime/gc_test.go | 193 ------------ src/runtime/mgc.go | 43 +-- src/runtime/mgcmark.go | 49 +-- src/runtime/mgcmark_greenteagc.go | 597 +++++++++++++++++++++++++----------- src/runtime/mgcmark_nogreenteagc.go | 36 ++- src/runtime/mgcpacer.go | 2 +- src/runtime/mgcsweep.go | 4 + src/runtime/mgcwork.go | 22 +- src/runtime/mheap.go | 96 +----- src/runtime/proc.go | 13 + 11 files changed, 546 insertions(+), 533 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 607281d382..2a70111568 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1289,30 +1289,6 @@ func MSpanCountAlloc(ms *MSpan, bits []byte) int { return result } -type MSpanQueue mSpanQueue - -func (q *MSpanQueue) Size() int { - return (*mSpanQueue)(q).n -} - -func (q *MSpanQueue) Push(s *MSpan) { - (*mSpanQueue)(q).push((*mspan)(s)) -} - -func (q *MSpanQueue) Pop() *MSpan { - s := (*mSpanQueue)(q).pop() - return (*MSpan)(s) -} - -func (q *MSpanQueue) TakeAll(p *MSpanQueue) { - (*mSpanQueue)(q).takeAll((*mSpanQueue)(p)) -} - -func (q *MSpanQueue) PopN(n int) MSpanQueue { - p := (*mSpanQueue)(q).popN(n) - return (MSpanQueue)(p) -} - const ( TimeHistSubBucketBits = timeHistSubBucketBits TimeHistNumSubBuckets = timeHistNumSubBuckets diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 0a1e01cbcf..be4d3451f3 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -881,199 +881,6 @@ func TestWeakToStrongMarkTermination(t *testing.T) { } } -func TestMSpanQueue(t *testing.T) { - expectSize := func(t *testing.T, q *runtime.MSpanQueue, want int) { - t.Helper() - if got := q.Size(); got != want { - t.Errorf("expected size %d, got %d", want, got) - } - } - expectMSpan := func(t *testing.T, got, want *runtime.MSpan, op string) { - t.Helper() - if got != want { - t.Errorf("expected mspan %p from %s, got %p", want, op, got) - } - } - makeSpans := func(t *testing.T, n int) ([]*runtime.MSpan, func()) { - t.Helper() - spans := make([]*runtime.MSpan, 0, n) - for range cap(spans) { - spans = append(spans, runtime.AllocMSpan()) - } - return spans, func() { - for i, s := range spans { - runtime.FreeMSpan(s) - spans[i] = nil - } - } - } - t.Run("Empty", func(t *testing.T) { - var q runtime.MSpanQueue - expectSize(t, &q, 0) - expectMSpan(t, q.Pop(), nil, "pop") - }) - t.Run("PushPop", func(t *testing.T) { - s := runtime.AllocMSpan() - defer runtime.FreeMSpan(s) - - var q runtime.MSpanQueue - q.Push(s) - expectSize(t, &q, 1) - expectMSpan(t, q.Pop(), s, "pop") - expectMSpan(t, q.Pop(), nil, "pop") - }) - t.Run("PushPopPushPop", func(t *testing.T) { - s0 := runtime.AllocMSpan() - defer runtime.FreeMSpan(s0) - s1 := runtime.AllocMSpan() - defer runtime.FreeMSpan(s1) - - var q runtime.MSpanQueue - - // Push and pop s0. - q.Push(s0) - expectSize(t, &q, 1) - expectMSpan(t, q.Pop(), s0, "pop") - expectMSpan(t, q.Pop(), nil, "pop") - - // Push and pop s1. - q.Push(s1) - expectSize(t, &q, 1) - expectMSpan(t, q.Pop(), s1, "pop") - expectMSpan(t, q.Pop(), nil, "pop") - }) - t.Run("PushPushPopPop", func(t *testing.T) { - s0 := runtime.AllocMSpan() - defer runtime.FreeMSpan(s0) - s1 := runtime.AllocMSpan() - defer runtime.FreeMSpan(s1) - - var q runtime.MSpanQueue - q.Push(s0) - expectSize(t, &q, 1) - q.Push(s1) - expectSize(t, &q, 2) - expectMSpan(t, q.Pop(), s0, "pop") - expectMSpan(t, q.Pop(), s1, "pop") - expectMSpan(t, q.Pop(), nil, "pop") - }) - t.Run("EmptyTakeAll", func(t *testing.T) { - var q runtime.MSpanQueue - var p runtime.MSpanQueue - expectSize(t, &p, 0) - expectSize(t, &q, 0) - p.TakeAll(&q) - expectSize(t, &p, 0) - expectSize(t, &q, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4TakeAll", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - - var p runtime.MSpanQueue - p.TakeAll(&q) - expectSize(t, &p, 4) - for i := range p.Size() { - expectMSpan(t, p.Pop(), spans[i], "pop") - } - expectSize(t, &p, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4Pop3", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - p := q.PopN(3) - expectSize(t, &p, 3) - expectSize(t, &q, 1) - for i := range p.Size() { - expectMSpan(t, p.Pop(), spans[i], "pop") - } - expectMSpan(t, q.Pop(), spans[len(spans)-1], "pop") - expectSize(t, &p, 0) - expectSize(t, &q, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4Pop0", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - p := q.PopN(0) - expectSize(t, &p, 0) - expectSize(t, &q, 4) - for i := range q.Size() { - expectMSpan(t, q.Pop(), spans[i], "pop") - } - expectSize(t, &p, 0) - expectSize(t, &q, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4Pop4", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - p := q.PopN(4) - expectSize(t, &p, 4) - expectSize(t, &q, 0) - for i := range p.Size() { - expectMSpan(t, p.Pop(), spans[i], "pop") - } - expectSize(t, &p, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) - t.Run("Push4Pop5", func(t *testing.T) { - spans, free := makeSpans(t, 4) - defer free() - - var q runtime.MSpanQueue - for i, s := range spans { - expectSize(t, &q, i) - q.Push(s) - expectSize(t, &q, i+1) - } - p := q.PopN(5) - expectSize(t, &p, 4) - expectSize(t, &q, 0) - for i := range p.Size() { - expectMSpan(t, p.Pop(), spans[i], "pop") - } - expectSize(t, &p, 0) - expectMSpan(t, q.Pop(), nil, "pop") - expectMSpan(t, p.Pop(), nil, "pop") - }) -} - func TestDetectFinalizerAndCleanupLeaks(t *testing.T) { got := runTestProg(t, "testprog", "DetectFinalizerAndCleanupLeaks", "GODEBUG=checkfinalizers=1") sp := strings.SplitN(got, "detected possible issues with cleanups and/or finalizers", 2) diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index efefa09475..68cbfda500 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -326,7 +326,7 @@ type workType struct { full lfstack // lock-free list of full blocks workbuf _ cpu.CacheLinePad // prevents false-sharing between full and empty empty lfstack // lock-free list of empty blocks workbuf - _ cpu.CacheLinePad // prevents false-sharing between empty and nproc/nwait + _ cpu.CacheLinePad // prevents false-sharing between empty and wbufSpans wbufSpans struct { lock mutex @@ -337,12 +337,24 @@ type workType struct { // one of the workbuf lists. busy mSpanList } - _ cpu.CacheLinePad // prevents false-sharing between wbufSpans and spanq + _ cpu.CacheLinePad // prevents false-sharing between wbufSpans and spanWorkMask - // Global queue of spans to scan. + // spanqMask is a bitmap indicating which Ps have local work worth stealing. + // Set or cleared by the owning P, cleared by stealing Ps. + // + // spanqMask is like a proxy for a global queue. An important invariant is that + // forced flushing like gcw.dispose must set this bit on any P that has local + // span work. + spanqMask pMask + _ cpu.CacheLinePad // prevents false-sharing between spanqMask and everything else + + // List of all spanSPMCs. // // Only used if goexperiment.GreenTeaGC. - spanq spanQueue + spanSPMCs struct { + lock mutex // no lock rank because it's a leaf lock (see mklockrank.go). + all *spanSPMC + } // Restore 64-bit alignment on 32-bit. // _ uint32 @@ -711,8 +723,9 @@ func gcStart(trigger gcTrigger) { traceRelease(trace) } - // Check that all Ps have finished deferred mcache flushes. + // Check and setup per-P state. for _, p := range allp { + // Check that all Ps have finished deferred mcache flushes. if fg := p.mcache.flushGen.Load(); fg != mheap_.sweepgen { println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen) throw("p mcache not flushed") @@ -923,6 +936,7 @@ top: // TODO(austin): Break up these workbufs to // better distribute work. pp.gcw.dispose() + // Collect the flushedWork flag. if pp.gcw.flushedWork { atomic.Xadd(&gcMarkDoneFlushed, 1) @@ -1623,17 +1637,6 @@ func gcEndWork() (last bool) { return incnwait == work.nproc && !gcMarkWorkAvailable() } -// gcMarkWorkAvailable reports whether there's any non-local work available to do. -func gcMarkWorkAvailable() bool { - if !work.full.empty() || !work.spanq.empty() { - return true // global work available - } - if work.markrootNext < work.markrootJobs { - return true // root scan work available - } - return false -} - // gcMark runs the mark (or, for concurrent GC, mark termination) // All gcWork caches must be empty. // STW is in effect at this point. @@ -1644,8 +1647,8 @@ func gcMark(startTime int64) { work.tstart = startTime // Check that there's no marking work remaining. - if work.full != 0 || work.markrootNext < work.markrootJobs || !work.spanq.empty() { - print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, " spanq.n=", work.spanq.size(), "\n") + if work.full != 0 || work.markrootNext < work.markrootJobs { + print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n") panic("non-empty mark queue after concurrent mark") } @@ -1761,10 +1764,12 @@ func gcSweep(mode gcMode) bool { // Sweep all spans eagerly. for sweepone() != ^uintptr(0) { } - // Free workbufs eagerly. + // Free workbufs and span rings eagerly. prepareFreeWorkbufs() for freeSomeWbufs(false) { } + for freeSomeSpanSPMCs(false) { + } // All "free" events for this mark/sweep cycle have // now happened, so we can make this profile cycle // available immediately. diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index b8542383db..f85ebda260 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -666,6 +666,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) { gp.gcAssistBytes = 0 return } + // Track time spent in this assist. Since we're on the // system stack, this is non-preemptible, so we can // just measure start and end time. @@ -1231,14 +1232,18 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) { var b uintptr var s objptr if b = gcw.tryGetObjFast(); b == 0 { - if s = gcw.tryGetSpan(false); s == 0 { + if s = gcw.tryGetSpanFast(); s == 0 { if b = gcw.tryGetObj(); b == 0 { - // Flush the write barrier - // buffer; this may create - // more work. - wbBufFlush() - if b = gcw.tryGetObj(); b == 0 { - s = gcw.tryGetSpan(true) + if s = gcw.tryGetSpan(); s == 0 { + // Flush the write barrier + // buffer; this may create + // more work. + wbBufFlush() + if b = gcw.tryGetObj(); b == 0 { + if s = gcw.tryGetSpan(); s == 0 { + s = gcw.tryStealSpan() + } + } } } } @@ -1327,22 +1332,26 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { var b uintptr var s objptr if b = gcw.tryGetObjFast(); b == 0 { - if s = gcw.tryGetSpan(false); s == 0 { + if s = gcw.tryGetSpanFast(); s == 0 { if b = gcw.tryGetObj(); b == 0 { - // Flush the write barrier - // buffer; this may create - // more work. - wbBufFlush() - if b = gcw.tryGetObj(); b == 0 { - // Try to do a root job. - if work.markrootNext < work.markrootJobs { - job := atomic.Xadd(&work.markrootNext, +1) - 1 - if job < work.markrootJobs { - workFlushed += markroot(gcw, job, false) - continue + if s = gcw.tryGetSpan(); s == 0 { + // Flush the write barrier + // buffer; this may create + // more work. + wbBufFlush() + if b = gcw.tryGetObj(); b == 0 { + if s = gcw.tryGetSpan(); s == 0 { + // Try to do a root job. + if work.markrootNext < work.markrootJobs { + job := atomic.Xadd(&work.markrootNext, +1) - 1 + if job < work.markrootJobs { + workFlushed += markroot(gcw, job, false) + continue + } + } + s = gcw.tryStealSpan() } } - s = gcw.tryGetSpan(true) } } } diff --git a/src/runtime/mgcmark_greenteagc.go b/src/runtime/mgcmark_greenteagc.go index a45845b476..53fcd3d966 100644 --- a/src/runtime/mgcmark_greenteagc.go +++ b/src/runtime/mgcmark_greenteagc.go @@ -37,7 +37,6 @@ package runtime import ( - "internal/cpu" "internal/goarch" "internal/runtime/atomic" "internal/runtime/gc" @@ -300,6 +299,12 @@ func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool { if q.tryAcquire() { if gcw.spanq.put(makeObjPtr(base, objIndex)) { if gcphase == _GCmark { + // This is intentionally racy; the bit set here might get + // stomped on by a stealing P. See the comment in tryStealSpan + // for an explanation as to why this is OK. + if !work.spanqMask.read(uint32(gcw.id)) { + work.spanqMask.set(gcw.id) + } gcw.mayNeedWorker = true } gcw.flushedWork = true @@ -308,260 +313,487 @@ func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool { return true } +// tryGetSpanFast attempts to get an entire span to scan. +func (w *gcWork) tryGetSpanFast() objptr { + return w.spanq.tryGetFast() +} + // tryGetSpan attempts to get an entire span to scan. -func (w *gcWork) tryGetSpan(slow bool) objptr { - if s := w.spanq.get(); s != 0 { +func (w *gcWork) tryGetSpan() objptr { + if s := w.spanq.tryGetFast(); s != 0 { return s } - - if slow { - // Check the global span queue. - if s := work.spanq.get(w); s != 0 { - return s - } - - // Attempt to steal spans to scan from other Ps. - return spanQueueSteal(w) + // "Steal" from ourselves. + if s := w.spanq.steal(&w.spanq); s != 0 { + return s + } + // We failed to get any local work, so we're fresh out. + // Nobody else is going to add work for us. Clear our bit. + if work.spanqMask.read(uint32(w.id)) { + work.spanqMask.clear(w.id) } return 0 } -// spanQueue is a concurrent safe queue of mspans. Each mspan is represented -// as an objptr whose spanBase is the base address of the span. +// spanQueue is a P-local stealable span queue. type spanQueue struct { - avail atomic.Bool // optimization to check emptiness w/o the lock - _ cpu.CacheLinePad // prevents false-sharing between lock and avail - lock mutex - q mSpanQueue -} - -func (q *spanQueue) empty() bool { - return !q.avail.Load() -} + // head, tail, and ring represent a local non-thread-safe ring buffer. + head, tail uint32 + ring [256]objptr -func (q *spanQueue) size() int { - return q.q.n -} + // putsSinceDrain counts the number of put calls since the last drain. + putsSinceDrain int -// putBatch adds a whole batch of spans to the queue. -func (q *spanQueue) putBatch(batch []objptr) { - var list mSpanQueue - for _, p := range batch { - s := spanOfUnchecked(p.spanBase()) - s.scanIdx = p.objIndex() - list.push(s) + // chain contains state visible to other Ps. + // + // In particular, that means a linked chain of single-producer multi-consumer + // ring buffers where the single producer is this P only. + // + // This linked chain structure is based off the sync.Pool dequeue. + chain struct { + // head is the spanSPMC to put to. This is only accessed + // by the producer, so doesn't need to be synchronized. + head *spanSPMC + + // tail is the spanSPMC to steal from. This is accessed + // by consumers, so reads and writes must be atomic. + tail atomic.UnsafePointer // *spanSPMC } +} - lock(&q.lock) - if q.q.n == 0 { - q.avail.Store(true) +// putFast tries to put s onto the queue, but may fail if it's full. +func (q *spanQueue) putFast(s objptr) (ok bool) { + if q.tail-q.head == uint32(len(q.ring)) { + return false } - q.q.takeAll(&list) - unlock(&q.lock) + q.ring[q.tail%uint32(len(q.ring))] = s + q.tail++ + return true } -// get tries to take a span off the queue. +// put puts s onto the queue. // -// Returns a non-zero objptr on success. Also, moves additional -// spans to gcw's local span queue. -func (q *spanQueue) get(gcw *gcWork) objptr { - if q.empty() { - return 0 - } - lock(&q.lock) - if q.q.n == 0 { - unlock(&q.lock) - return 0 - } - n := q.q.n/int(gomaxprocs) + 1 - if n > q.q.n { - n = q.q.n - } - if max := len(gcw.spanq.ring) / 2; n > max { - n = max +// Returns whether the caller should spin up a new worker. +func (q *spanQueue) put(s objptr) bool { + // The constants below define the period of and volume of + // spans we spill to the spmc chain when the local queue is + // not full. + // + // spillPeriod must be > spillMax, otherwise that sets the + // effective maximum size of our local span queue. Even if + // we have a span ring of size N, but we flush K spans every + // K puts, then K becomes our effective maximum length. When + // spillPeriod > spillMax, then we're always spilling spans + // at a slower rate than we're accumulating them. + const ( + // spillPeriod defines how often to check if we should + // spill some spans, counted in the number of calls to put. + spillPeriod = 64 + + // spillMax defines, at most, how many spans to drain with + // each spill. + spillMax = 16 + ) + + if q.putFast(s) { + // Occasionally try to spill some work to generate parallelism. + q.putsSinceDrain++ + if q.putsSinceDrain >= spillPeriod { + // Reset even if we don't drain, so we don't check every time. + q.putsSinceDrain = 0 + + // Try to drain some spans. Don't bother if there's very + // few of them or there's already spans in the spmc chain. + n := min((q.tail-q.head)/2, spillMax) + if n > 4 && q.chainEmpty() { + q.drain(n) + return true + } + } + return false } - newQ := q.q.popN(n) - if q.q.n == 0 { - q.avail.Store(false) + + // We're out of space. Drain out our local spans. + q.drain(uint32(len(q.ring)) / 2) + if !q.putFast(s) { + throw("failed putFast after drain") } - unlock(&q.lock) + return true +} - s := newQ.pop() - for newQ.n > 0 { - s := newQ.pop() - gcw.spanq.put(makeObjPtr(s.base(), s.scanIdx)) +// flush publishes all spans in the local queue to the spmc chain. +func (q *spanQueue) flush() { + n := q.tail - q.head + if n == 0 { + return } - return makeObjPtr(s.base(), s.scanIdx) + q.drain(n) } -// localSpanQueue is a P-local ring buffer of objptrs that represent spans. -// Accessed without a lock. +// empty returns true if there's no more work on the queue. // -// Multi-consumer, single-producer. The only producer is the P that owns this -// queue, but any other P may consume from it. -// -// This is based on the scheduler runqueues. If making changes there, consider -// also making them here. -type localSpanQueue struct { - head atomic.Uint32 - tail atomic.Uint32 - ring [256]objptr +// Not thread-safe. Must only be called by the owner of q. +func (q *spanQueue) empty() bool { + // Check the local queue for work. + if q.tail-q.head > 0 { + return false + } + return q.chainEmpty() } -// put adds s to the queue. Returns true if put flushed to the global queue -// because it was full. -func (q *localSpanQueue) put(s objptr) (flushed bool) { - for { - h := q.head.Load() // synchronize with consumers - t := q.tail.Load() - if t-h < uint32(len(q.ring)) { - q.ring[t%uint32(len(q.ring))] = s - q.tail.Store(t + 1) // Makes the item avail for consumption. +// chainEmpty returns true if the spmc chain is empty. +// +// Thread-safe. +func (q *spanQueue) chainEmpty() bool { + // Check the rest of the rings for work. + r := (*spanSPMC)(q.chain.tail.Load()) + for r != nil { + if !r.empty() { return false } - if q.putSlow(s, h, t) { - return true - } - // The queue is not full, now the put above must succeed. + r = (*spanSPMC)(r.prev.Load()) } + return true } -// putSlow is a helper for put to move spans to the global queue. -// Returns true on success, false on failure (nothing moved). -func (q *localSpanQueue) putSlow(s objptr, h, t uint32) bool { - var batch [len(q.ring)/2 + 1]objptr +// drain publishes n spans from the local queue to the spmc chain. +func (q *spanQueue) drain(n uint32) { + q.putsSinceDrain = 0 + + if q.chain.head == nil { + // N.B. We target 1024, but this may be bigger if the physical + // page size is bigger, or if we can fit more uintptrs into a + // physical page. See newSpanSPMC docs. + r := newSpanSPMC(1024) + q.chain.head = r + q.chain.tail.StoreNoWB(unsafe.Pointer(r)) + } + + // Try to drain some of the queue to the head spmc. + if q.tryDrain(q.chain.head, n) { + return + } + // No space. Create a bigger spmc and add it to the chain. - // First, grab a batch from local queue. - n := t - h - n = n / 2 - if n != uint32(len(q.ring)/2) { - throw("localSpanQueue.putSlow: queue is not full") + // Double the size of the next one, up to a maximum. + // + // We double each time so we can avoid taking this slow path + // in the future, which involves a global lock. Ideally we want + // to hit a steady-state where the deepest any queue goes during + // a mark phase can fit in the ring. + // + // However, we still set a maximum on this. We set the maximum + // to something large to amortize the cost of lock acquisition, but + // still at a reasonable size for big heaps and/or a lot of Ps (which + // tend to be correlated). + // + // It's not too bad to burn relatively large-but-fixed amounts of per-P + // memory if we need to deal with really, really deep queues, since the + // constants of proportionality are small. Simultaneously, we want to + // avoid a situation where a single worker ends up queuing O(heap) + // work and then forever retains a queue of that size. + const maxCap = 1 << 20 / goarch.PtrSize + newCap := q.chain.head.cap * 2 + if newCap > maxCap { + newCap = maxCap } - for i := uint32(0); i < n; i++ { - batch[i] = q.ring[(h+i)%uint32(len(q.ring))] + newHead := newSpanSPMC(newCap) + if !q.tryDrain(newHead, n) { + throw("failed to put span on newly-allocated spanSPMC") } - if !q.head.CompareAndSwap(h, h+n) { // Commits consume. - return false + q.chain.head.prev.StoreNoWB(unsafe.Pointer(newHead)) + q.chain.head = newHead +} + +// tryDrain attempts to drain n spans from q's local queue to the chain. +// +// Returns whether it succeeded. +func (q *spanQueue) tryDrain(r *spanSPMC, n uint32) bool { + if q.head+n > q.tail { + throw("attempt to drain too many elements") } - batch[n] = s + h := r.head.Load() // synchronize with consumers + t := r.tail.Load() + rn := t - h + if rn+n <= r.cap { + for i := uint32(0); i < n; i++ { + *r.slot(t + i) = q.ring[(q.head+i)%uint32(len(q.ring))] + } + r.tail.Store(t + n) // Makes the items avail for consumption. + q.head += n + return true + } + return false +} - work.spanq.putBatch(batch[:]) - return true +// tryGetFast attempts to get a span from the local queue, but may fail if it's empty, +// returning false. +func (q *spanQueue) tryGetFast() objptr { + if q.tail-q.head == 0 { + return 0 + } + s := q.ring[q.head%uint32(len(q.ring))] + q.head++ + return s } -// get attempts to take a span off the queue. Might fail if the -// queue is empty. May be called by multiple threads, but callers -// are better off using stealFrom to amortize the cost of stealing. -// This method is intended for use by the owner of this queue. -func (q *localSpanQueue) get() objptr { +// steal takes some spans from the ring chain of another span queue. +// +// q == q2 is OK. +func (q *spanQueue) steal(q2 *spanQueue) objptr { + r := (*spanSPMC)(q2.chain.tail.Load()) + if r == nil { + return 0 + } for { - h := q.head.Load() - t := q.tail.Load() - if t == h { + // It's important that we load the next pointer + // *before* popping the tail. In general, r may be + // transiently empty, but if next is non-nil before + // the pop and the pop fails, then r is permanently + // empty, which is the only condition under which it's + // safe to drop r from the chain. + r2 := (*spanSPMC)(r.prev.Load()) + + // Try to refill from one of the rings + if s := q.refill(r); s != 0 { + return s + } + + if r2 == nil { + // This is the only ring. It's empty right + // now, but could be pushed to in the future. return 0 } - s := q.ring[h%uint32(len(q.ring))] - if q.head.CompareAndSwap(h, h+1) { - return s + + // The tail of the chain has been drained, so move on + // to the next ring. Try to drop it from the chain + // so the next consumer doesn't have to look at the empty + // ring again. + if q2.chain.tail.CompareAndSwapNoWB(unsafe.Pointer(r), unsafe.Pointer(r2)) { + r.dead.Store(true) } - } -} -func (q *localSpanQueue) empty() bool { - h := q.head.Load() - t := q.tail.Load() - return t == h + r = r2 + } } -// stealFrom takes spans from q2 and puts them into q1. One span is removed -// from the stolen spans and returned on success. Failure to steal returns a -// zero objptr. -func (q1 *localSpanQueue) stealFrom(q2 *localSpanQueue) objptr { - writeHead := q1.tail.Load() +// refill takes some spans from r and puts them into q's local queue. +// +// One span is removed from the stolen spans and returned on success. +// Failure to steal returns a zero objptr. +// +// steal is thread-safe with respect to r. +func (q *spanQueue) refill(r *spanSPMC) objptr { + if q.tail-q.head != 0 { + throw("steal with local work available") + } + // Steal some spans. var n uint32 for { - h := q2.head.Load() // load-acquire, synchronize with other consumers - t := q2.tail.Load() // load-acquire, synchronize with the producer + h := r.head.Load() // load-acquire, synchronize with other consumers + t := r.tail.Load() // load-acquire, synchronize with the producer n = t - h n = n - n/2 if n == 0 { return 0 } - if n > uint32(len(q2.ring)/2) { // read inconsistent h and t + if n > r.cap { // read inconsistent h and t continue } + n = min(n, uint32(len(q.ring)/2)) for i := uint32(0); i < n; i++ { - c := q2.ring[(h+i)%uint32(len(q2.ring))] - q1.ring[(writeHead+i)%uint32(len(q1.ring))] = c + q.ring[i] = *r.slot(h + i) } - if q2.head.CompareAndSwap(h, h+n) { + if r.head.CompareAndSwap(h, h+n) { break } } - n-- - c := q1.ring[(writeHead+n)%uint32(len(q1.ring))] - if n == 0 { - return c + + // Update local queue head and tail to reflect new buffered values. + q.head = 0 + q.tail = n + + // Pop off the head of the queue and return it. + return q.tryGetFast() +} + +// spanSPMC is a ring buffer of objptrs that represent spans. +// Accessed without a lock. +// +// Single-producer, multi-consumer. The only producer is the P that owns this +// queue, but any other P may consume from it. +// +// ## Invariants for memory management +// +// 1. All spanSPMCs are allocated from mheap_.spanSPMCAlloc. +// 2. All allocated spanSPMCs must be on the work.spanSPMCs list. +// 3. spanSPMCs may only be allocated if gcphase != _GCoff. +// 4. spanSPMCs may only be deallocated if gcphase == _GCoff. +// +// Invariants (3) and (4) ensure that we do not need to concern ourselves with +// tricky reuse issues that stem from not knowing when a thread is truly done +// with a spanSPMC. For example, two threads could load the same spanSPMC from +// the tail of the chain. One thread is then paused while the other steals the +// last few elements off of it. It's not safe to free at that point since the +// other thread will still inspect that spanSPMC, and we have no way of knowing +// without more complex and/or heavyweight synchronization. +// +// Instead, we rely on the global synchronization inherent to GC phases, and +// the fact that spanSPMCs are only ever used during the mark phase, to ensure +// memory safety. This means we temporarily waste some memory, but it's only +// until the end of the mark phase. +type spanSPMC struct { + _ sys.NotInHeap + + // allnext is the link to the next spanSPMC on the work.spanSPMCs list. + // This is used to find and free dead spanSPMCs. Protected by + // work.spanSPMCs.lock. + allnext *spanSPMC + + // dead indicates whether the spanSPMC is no longer in use. + // Protected by the CAS to the prev field of the spanSPMC pointing + // to this spanSPMC. That is, whoever wins that CAS takes ownership + // of marking this spanSPMC as dead. See spanQueue.steal for details. + dead atomic.Bool + + // prev is the next link up a spanQueue's SPMC chain, from tail to head, + // hence the name "prev." Set by a spanQueue's producer, cleared by a + // CAS in spanQueue.steal. + prev atomic.UnsafePointer // *spanSPMC + + // head, tail, cap, and ring together represent a fixed-size SPMC lock-free + // ring buffer of size cap. The ring buffer contains objptr values. + head atomic.Uint32 + tail atomic.Uint32 + cap uint32 // cap(ring)) + ring *objptr +} + +// newSpanSPMC allocates and initializes a new spmc with the provided capacity. +// +// newSpanSPMC may override the capacity with a larger one if the provided one would +// waste memory. +func newSpanSPMC(cap uint32) *spanSPMC { + lock(&work.spanSPMCs.lock) + r := (*spanSPMC)(mheap_.spanSPMCAlloc.alloc()) + r.allnext = work.spanSPMCs.all + work.spanSPMCs.all = r + unlock(&work.spanSPMCs.lock) + + // If cap < the capacity of a single physical page, round up. + pageCap := uint32(physPageSize / goarch.PtrSize) // capacity of a single page + if cap < pageCap { + cap = pageCap } - h := q1.head.Load() - if writeHead-h+n >= uint32(len(q1.ring)) { - throw("localSpanQueue.stealFrom: queue overflow") + if cap&(cap-1) != 0 { + throw("spmc capacity must be a power of 2") } - q1.tail.Store(writeHead + n) - return c + + r.cap = cap + ring := sysAlloc(uintptr(cap)*unsafe.Sizeof(objptr(0)), &memstats.gcMiscSys, "GC span queue") + atomic.StorepNoWB(unsafe.Pointer(&r.ring), ring) + return r } -// drain moves all spans in the queue to the global queue. +// empty returns true if the spmc is empty. // -// Returns true if anything was moved. -func (q *localSpanQueue) drain() bool { - var batch [len(q.ring)]objptr +// empty is thread-safe. +func (r *spanSPMC) empty() bool { + h := r.head.Load() + t := r.tail.Load() + return t == h +} - var n uint32 - for { - var h uint32 - for { - h = q.head.Load() - t := q.tail.Load() - n = t - h - if n == 0 { - return false - } - if n <= uint32(len(q.ring)) { - break - } - // Read inconsistent h and t. - } - for i := uint32(0); i < n; i++ { - batch[i] = q.ring[(h+i)%uint32(len(q.ring))] - } - if q.head.CompareAndSwap(h, h+n) { // Commits consume. +// deinit frees any resources the spanSPMC is holding onto and zeroes it. +func (r *spanSPMC) deinit() { + sysFree(unsafe.Pointer(r.ring), uintptr(r.cap)*unsafe.Sizeof(objptr(0)), &memstats.gcMiscSys) + r.ring = nil + r.dead.Store(false) + r.prev.StoreNoWB(nil) + r.head.Store(0) + r.tail.Store(0) + r.cap = 0 +} + +// slot returns a pointer to slot i%r.cap. +func (r *spanSPMC) slot(i uint32) *objptr { + idx := uintptr(i & (r.cap - 1)) + return (*objptr)(unsafe.Add(unsafe.Pointer(r.ring), idx*unsafe.Sizeof(objptr(0)))) +} + +// freeSomeSpanSPMCs frees some spanSPMCs back to the OS and returns +// true if it should be called again to free more. +func freeSomeSpanSPMCs(preemptible bool) bool { + // TODO(mknyszek): This is arbitrary, but some kind of limit is necessary + // to help bound delays to cooperatively preempt ourselves. + const batchSize = 64 + + // According to the SPMC memory management invariants, we can only free + // spanSPMCs outside of the mark phase. We ensure we do this in two ways. + // + // 1. We take the work.spanSPMCs lock, which we need anyway. This ensures + // that we are non-preemptible. If this path becomes lock-free, we will + // need to become non-preemptible in some other way. + // 2. Once we are non-preemptible, we check the gcphase, and back out if + // it's not safe. + // + // This way, we ensure that we don't start freeing if we're in the wrong + // phase, and the phase can't change on us while we're freeing. + lock(&work.spanSPMCs.lock) + if gcphase != _GCoff || work.spanSPMCs.all == nil { + unlock(&work.spanSPMCs.lock) + return false + } + rp := &work.spanSPMCs.all + gp := getg() + more := true + for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ { + r := *rp + if r == nil { + more = false break } + if r.dead.Load() { + // It's dead. Deinitialize and free it. + *rp = r.allnext + r.deinit() + mheap_.spanSPMCAlloc.free(unsafe.Pointer(r)) + } else { + // Still alive, likely in some P's chain. + // Skip it. + rp = &r.allnext + } } - if !q.empty() { - throw("drained local span queue, but not empty") - } - - work.spanq.putBatch(batch[:n]) - return true + unlock(&work.spanSPMCs.lock) + return more } -// spanQueueSteal attempts to steal a span from another P's local queue. +// tryStealSpan attempts to steal a span from another P's local queue. // // Returns a non-zero objptr on success. -func spanQueueSteal(gcw *gcWork) objptr { +func (w *gcWork) tryStealSpan() objptr { pp := getg().m.p.ptr() for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() { + if !work.spanqMask.read(enum.position()) { + continue + } p2 := allp[enum.position()] if pp == p2 { continue } - if s := gcw.spanq.stealFrom(&p2.gcw.spanq); s != 0 { + if s := w.spanq.steal(&p2.gcw.spanq); s != 0 { return s } + // N.B. This is intentionally racy. We may stomp on a mask set by + // a P that just put a bunch of work into its local queue. + // + // This is OK because the ragged barrier in gcMarkDone will set + // the bit on each P if there's local work we missed. This race + // should generally be rare, since the window between noticing + // an empty local queue and this bit being set is quite small. + work.spanqMask.clear(int32(enum.position())) } return 0 } @@ -903,6 +1135,23 @@ func (w *gcWork) flushScanStats(dst *[gc.NumSizeClasses]sizeClassScanStats) { clear(w.stats[:]) } +// gcMarkWorkAvailable reports whether there's any non-local work available to do. +// +// This is a heavyweight check and must only be used for correctness, not +// as a hint. +func gcMarkWorkAvailable() bool { + if !work.full.empty() { + return true // global work available + } + if work.markrootNext < work.markrootJobs { + return true // root scan work available + } + if work.spanqMask.any() { + return true // stealable local work available + } + return false +} + // scanObject scans the object starting at b, adding pointers to gcw. // b must point to the beginning of a heap object or an oblet. // scanObject consults the GC bitmap for the pointer mask and the diff --git a/src/runtime/mgcmark_nogreenteagc.go b/src/runtime/mgcmark_nogreenteagc.go index 6375773123..e450503291 100644 --- a/src/runtime/mgcmark_nogreenteagc.go +++ b/src/runtime/mgcmark_nogreenteagc.go @@ -54,31 +54,34 @@ func (q *spanInlineMarkBits) tryAcquire() bool { } type spanQueue struct { - _ uint32 // To match alignment padding requirements for atomically-accessed variables in workType. } -func (q *spanQueue) empty() bool { - return true +func (q *spanQueue) flush() { } -func (q *spanQueue) size() int { - return 0 +func (q *spanQueue) empty() bool { + return true } -type localSpanQueue struct { +type spanSPMC struct { + _ sys.NotInHeap } -func (q *localSpanQueue) drain() bool { +func freeSomeSpanSPMCs(preemptible bool) bool { return false } -func (q *localSpanQueue) empty() bool { - return true +type objptr uintptr + +func (w *gcWork) tryGetSpanFast() objptr { + return 0 } -type objptr uintptr +func (w *gcWork) tryGetSpan() objptr { + return 0 +} -func (w *gcWork) tryGetSpan(steal bool) objptr { +func (w *gcWork) tryStealSpan() objptr { return 0 } @@ -116,6 +119,17 @@ func (w *gcWork) flushScanStats(dst *[gc.NumSizeClasses]sizeClassScanStats) { clear(w.stats[:]) } +// gcMarkWorkAvailable reports whether there's any non-local work available to do. +func gcMarkWorkAvailable() bool { + if !work.full.empty() { + return true // global work available + } + if work.markrootNext < work.markrootJobs { + return true // root scan work available + } + return false +} + // scanObject scans the object starting at b, adding pointers to gcw. // b must point to the beginning of a heap object or an oblet. // scanObject consults the GC bitmap for the pointer mask and the diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go index f637ba96b6..17e2f405e4 100644 --- a/src/runtime/mgcpacer.go +++ b/src/runtime/mgcpacer.go @@ -714,7 +714,7 @@ func (c *gcControllerState) enlistWorker() { // (the scheduler will already prefer to spin up a new // dedicated worker over an idle one). if sched.npidle.Load() != 0 && sched.nmspinning.Load() == 0 { - wakep() + wakep() // Likely to consume our worker request. return } } diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index b72cc461ba..364cdb58cc 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -307,6 +307,10 @@ func bgsweep(c chan int) { // N.B. freeSomeWbufs is already batched internally. goschedIfBusy() } + for freeSomeSpanSPMCs(true) { + // N.B. freeSomeSpanSPMCs is already batched internally. + goschedIfBusy() + } lock(&sweep.lock) if !isSweepDone() { // This can happen if a GC runs between diff --git a/src/runtime/mgcwork.go b/src/runtime/mgcwork.go index ee7eec9ef7..48ac94eecd 100644 --- a/src/runtime/mgcwork.go +++ b/src/runtime/mgcwork.go @@ -55,9 +55,10 @@ func init() { // | Priority | Work queue | Restrictions | Function | // |----------------------------------------------------------| // | 1 | Workbufs | P-local | tryGetObjFast | -// | 2 | Span queue | P-local | tryGetSpan(false) | [greenteagc] +// | 2 | Span queue | P-local | tryGetSpanFast | [greenteagc] // | 3 | Workbufs | None | tryGetObj | -// | 4 | Span queue | None | tryGetSpan(true) | [greenteagc] +// | 4 | Span queue | None | tryGetSpan | [greenteagc] +// | 5 | Span queue | None | tryStealSpan | [greenteagc] // +----------------------------------------------------------+ // // The rationale behind this ordering comes from two insights: @@ -80,6 +81,8 @@ func init() { // gcWork may locally hold GC work buffers. This can be done by // disabling preemption (systemstack or acquirem). type gcWork struct { + id int32 // same ID as the parent P + // wbuf1 and wbuf2 are the primary and secondary work buffers. // // This can be thought of as a stack of both work buffers' @@ -103,7 +106,7 @@ type gcWork struct { // spanq is a queue of spans to process. // // Only used if goexperiment.GreenTeaGC. - spanq localSpanQueue + spanq spanQueue // ptrBuf is a temporary buffer used by span scanning. ptrBuf *[pageSize / goarch.PtrSize]uintptr @@ -318,7 +321,18 @@ func (w *gcWork) dispose() { } w.wbuf2 = nil } - if w.spanq.drain() { + if !w.spanq.empty() { + w.spanq.flush() // Flush any local work. + + // There's globally-visible work now, so make everyone aware of it. + // + // Note that we need to make everyone aware even if flush didn't + // flush any local work. The global work was always visible, but + // the bitmap bit may have been unset. + // + // See the comment in tryStealSpan, which explains how it relies + // on this behavior. + work.spanqMask.set(w.id) w.flushedWork = true } if w.bytesMarked != 0 { diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 1776206573..049b7798a8 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -213,13 +213,14 @@ type mheap struct { pad [(cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize) % cpu.CacheLinePadSize]byte } - spanalloc fixalloc // allocator for span* - cachealloc fixalloc // allocator for mcache* - specialfinalizeralloc fixalloc // allocator for specialfinalizer* - specialCleanupAlloc fixalloc // allocator for specialCleanup* - specialCheckFinalizerAlloc fixalloc // allocator for specialCheckFinalizer* - specialTinyBlockAlloc fixalloc // allocator for specialTinyBlock* - specialprofilealloc fixalloc // allocator for specialprofile* + spanalloc fixalloc // allocator for span + spanSPMCAlloc fixalloc // allocator for spanSPMC, protected by work.spanSPMCs.lock + cachealloc fixalloc // allocator for mcache + specialfinalizeralloc fixalloc // allocator for specialfinalizer + specialCleanupAlloc fixalloc // allocator for specialCleanup + specialCheckFinalizerAlloc fixalloc // allocator for specialCheckFinalizer + specialTinyBlockAlloc fixalloc // allocator for specialTinyBlock + specialprofilealloc fixalloc // allocator for specialprofile specialReachableAlloc fixalloc // allocator for specialReachable specialPinCounterAlloc fixalloc // allocator for specialPinCounter specialWeakHandleAlloc fixalloc // allocator for specialWeakHandle @@ -793,6 +794,7 @@ func (h *mheap) init() { lockInit(&h.speciallock, lockRankMheapSpecial) h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys) + h.spanSPMCAlloc.init(unsafe.Sizeof(spanSPMC{}), nil, nil, &memstats.gcMiscSys) h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys) h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys) h.specialCleanupAlloc.init(unsafe.Sizeof(specialCleanup{}), nil, nil, &memstats.other_sys) @@ -1937,86 +1939,6 @@ func (list *mSpanList) takeAll(other *mSpanList) { other.first, other.last = nil, nil } -// mSpanQueue is like an mSpanList but is FIFO instead of LIFO and may -// be allocated on the stack. (mSpanList can be visible from the mspan -// itself, so it is marked as not-in-heap). -type mSpanQueue struct { - head, tail *mspan - n int -} - -// push adds s to the end of the queue. -func (q *mSpanQueue) push(s *mspan) { - if s.next != nil { - throw("span already on list") - } - if q.tail == nil { - q.tail, q.head = s, s - } else { - q.tail.next = s - q.tail = s - } - q.n++ -} - -// pop removes a span from the head of the queue, if any. -func (q *mSpanQueue) pop() *mspan { - if q.head == nil { - return nil - } - s := q.head - q.head = s.next - s.next = nil - if q.head == nil { - q.tail = nil - } - q.n-- - return s -} - -// takeAll removes all the spans from q2 and adds them to the end of q1, in order. -func (q1 *mSpanQueue) takeAll(q2 *mSpanQueue) { - if q2.head == nil { - return - } - if q1.head == nil { - *q1 = *q2 - } else { - q1.tail.next = q2.head - q1.tail = q2.tail - q1.n += q2.n - } - q2.tail = nil - q2.head = nil - q2.n = 0 -} - -// popN removes n spans from the head of the queue and returns them as a new queue. -func (q *mSpanQueue) popN(n int) mSpanQueue { - var newQ mSpanQueue - if n <= 0 { - return newQ - } - if n >= q.n { - newQ = *q - q.tail = nil - q.head = nil - q.n = 0 - return newQ - } - s := q.head - for range n - 1 { - s = s.next - } - q.n -= n - newQ.head = q.head - newQ.tail = s - newQ.n = n - q.head = s.next - s.next = nil - return newQ -} - const ( // _KindSpecialTinyBlock indicates that a given allocation is a tiny block. // Ordered before KindSpecialFinalizer and KindSpecialCleanup so that it diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 4154dcd76e..2c42cad6c1 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -5735,6 +5735,7 @@ func setcpuprofilerate(hz int32) { // previously destroyed p, and transitions it to status _Pgcstop. func (pp *p) init(id int32) { pp.id = id + pp.gcw.id = id pp.status = _Pgcstop pp.sudogcache = pp.sudogbuf[:0] pp.deferpool = pp.deferpoolbuf[:0] @@ -5890,6 +5891,7 @@ func procresize(nprocs int32) *p { idlepMask = idlepMask.resize(nprocs) timerpMask = timerpMask.resize(nprocs) + work.spanqMask = work.spanqMask.resize(nprocs) unlock(&allpLock) } @@ -5954,6 +5956,7 @@ func procresize(nprocs int32) *p { allp = allp[:nprocs] idlepMask = idlepMask.resize(nprocs) timerpMask = timerpMask.resize(nprocs) + work.spanqMask = work.spanqMask.resize(nprocs) unlock(&allpLock) } @@ -6892,6 +6895,16 @@ func (p pMask) clear(id int32) { atomic.And(&p[word], ^mask) } +// any returns true if any bit in p is set. +func (p pMask) any() bool { + for i := range p { + if atomic.Load(&p[i]) != 0 { + return true + } + } + return false +} + // resize resizes the pMask and returns a new one. // // The result may alias p, so callers are encouraged to -- cgit v1.3-5-g9baa From 411c250d64304033181c46413a6e9381e8fe9b82 Mon Sep 17 00:00:00 2001 From: Michael Matloob Date: Mon, 17 Mar 2025 11:45:52 -0400 Subject: runtime: add specialized malloc functions for sizes up to 512 bytes This CL adds a generator function in runtime/_mkmalloc to generate specialized mallocgc functions for sizes up throuht 512 bytes. (That's the limit where it's possible to end up in the no header case when there are scan bits, and where the benefits of the specialized functions significantly diminish according to microbenchmarks). If the specializedmalloc GOEXPERIMENT is turned on, mallocgc will call one of these functions in the no header case. malloc_generated.go is the generated file containing the specialized malloc functions. malloc_stubs.go contains the templates that will be stamped to create the specialized malloc functions. malloc_tables_generated contains the tables that mallocgc will use to select the specialized function to call. I've had to update the two stdlib_test.go files to account for the new submodule mkmalloc is in. mprof_test accounts for the changes in the stacks since different functions can be called in some cases. I still need to investigate heapsampling.go. Change-Id: Ia0f68dccdf1c6a200554ae88657cf4d686ace819 Reviewed-on: https://go-review.googlesource.com/c/go/+/665835 Reviewed-by: Michael Knyszek Reviewed-by: Michael Matloob LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/types2/stdlib_test.go | 1 + src/go/types/stdlib_test.go | 1 + src/internal/runtime/gc/sizeclasses.go | 2 + src/runtime/_mkmalloc/constants.go | 29 + src/runtime/_mkmalloc/go.mod | 5 + src/runtime/_mkmalloc/go.sum | 2 + src/runtime/_mkmalloc/mkmalloc.go | 605 ++ src/runtime/_mkmalloc/mkmalloc_test.go | 36 + src/runtime/_mkmalloc/mksizeclasses.go | 59 +- src/runtime/malloc.go | 63 +- src/runtime/malloc_generated.go | 8468 ++++++++++++++++++++++++ src/runtime/malloc_stubs.go | 586 ++ src/runtime/malloc_tables_generated.go | 1038 +++ src/runtime/malloc_tables_plan9.go | 14 + src/runtime/malloc_test.go | 10 + 15 files changed, 10860 insertions(+), 59 deletions(-) create mode 100644 src/runtime/_mkmalloc/constants.go create mode 100644 src/runtime/_mkmalloc/go.mod create mode 100644 src/runtime/_mkmalloc/go.sum create mode 100644 src/runtime/_mkmalloc/mkmalloc.go create mode 100644 src/runtime/_mkmalloc/mkmalloc_test.go create mode 100644 src/runtime/malloc_generated.go create mode 100644 src/runtime/malloc_stubs.go create mode 100644 src/runtime/malloc_tables_generated.go create mode 100644 src/runtime/malloc_tables_plan9.go (limited to 'src/runtime') diff --git a/src/cmd/compile/internal/types2/stdlib_test.go b/src/cmd/compile/internal/types2/stdlib_test.go index 365bc97462..a579c8184e 100644 --- a/src/cmd/compile/internal/types2/stdlib_test.go +++ b/src/cmd/compile/internal/types2/stdlib_test.go @@ -360,6 +360,7 @@ func TestStdKen(t *testing.T) { var excluded = map[string]bool{ "builtin": true, "cmd/compile/internal/ssa/_gen": true, + "runtime/_mkmalloc": true, } // printPackageMu synchronizes the printing of type-checked package files in diff --git a/src/go/types/stdlib_test.go b/src/go/types/stdlib_test.go index 79ccbc6fcf..eb838b2c88 100644 --- a/src/go/types/stdlib_test.go +++ b/src/go/types/stdlib_test.go @@ -362,6 +362,7 @@ func TestStdKen(t *testing.T) { var excluded = map[string]bool{ "builtin": true, "cmd/compile/internal/ssa/_gen": true, + "runtime/_mkmalloc": true, } // printPackageMu synchronizes the printing of type-checked package files in diff --git a/src/internal/runtime/gc/sizeclasses.go b/src/internal/runtime/gc/sizeclasses.go index 3ef13834e4..befba425cc 100644 --- a/src/internal/runtime/gc/sizeclasses.go +++ b/src/internal/runtime/gc/sizeclasses.go @@ -91,6 +91,8 @@ const ( PageShift = 13 MaxObjsPerSpan = 1024 MaxSizeClassNPages = 10 + TinySize = 16 + TinySizeClass = 2 ) var SizeClassToSize = [NumSizeClasses]uint16{0, 8, 16, 24, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024, 1152, 1280, 1408, 1536, 1792, 2048, 2304, 2688, 3072, 3200, 3456, 4096, 4864, 5376, 6144, 6528, 6784, 6912, 8192, 9472, 9728, 10240, 10880, 12288, 13568, 14336, 16384, 18432, 19072, 20480, 21760, 24576, 27264, 28672, 32768} diff --git a/src/runtime/_mkmalloc/constants.go b/src/runtime/_mkmalloc/constants.go new file mode 100644 index 0000000000..ad20c7b52b --- /dev/null +++ b/src/runtime/_mkmalloc/constants.go @@ -0,0 +1,29 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +const ( + // Constants that we use and will transfer to the runtime. + minHeapAlign = 8 + maxSmallSize = 32 << 10 + smallSizeDiv = 8 + smallSizeMax = 1024 + largeSizeDiv = 128 + pageShift = 13 + tinySize = 16 + + // Derived constants. + pageSize = 1 << pageShift +) + +const ( + maxPtrSize = max(4, 8) + maxPtrBits = 8 * maxPtrSize + + // Maximum size smallScanNoHeader would be called for, which is the + // maximum value gc.MinSizeForMallocHeader can have on any platform. + // gc.MinSizeForMallocHeader is defined as goarch.PtrSize * goarch.PtrBits. + smallScanNoHeaderMax = maxPtrSize * maxPtrBits +) diff --git a/src/runtime/_mkmalloc/go.mod b/src/runtime/_mkmalloc/go.mod new file mode 100644 index 0000000000..623c341769 --- /dev/null +++ b/src/runtime/_mkmalloc/go.mod @@ -0,0 +1,5 @@ +module runtime/_mkmalloc + +go 1.24 + +require golang.org/x/tools v0.33.0 diff --git a/src/runtime/_mkmalloc/go.sum b/src/runtime/_mkmalloc/go.sum new file mode 100644 index 0000000000..bead5223ca --- /dev/null +++ b/src/runtime/_mkmalloc/go.sum @@ -0,0 +1,2 @@ +golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= +golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= diff --git a/src/runtime/_mkmalloc/mkmalloc.go b/src/runtime/_mkmalloc/mkmalloc.go new file mode 100644 index 0000000000..986b0aa9f8 --- /dev/null +++ b/src/runtime/_mkmalloc/mkmalloc.go @@ -0,0 +1,605 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "flag" + "fmt" + "go/ast" + "go/format" + "go/parser" + "go/token" + "log" + "os" + "strings" + + "golang.org/x/tools/go/ast/astutil" + + internalastutil "runtime/_mkmalloc/astutil" +) + +var stdout = flag.Bool("stdout", false, "write sizeclasses source to stdout instead of sizeclasses.go") + +func makeSizeToSizeClass(classes []class) []uint8 { + sc := uint8(0) + ret := make([]uint8, smallScanNoHeaderMax+1) + for i := range ret { + if i > classes[sc].size { + sc++ + } + ret[i] = sc + } + return ret +} + +func main() { + log.SetFlags(0) + log.SetPrefix("mkmalloc: ") + + classes := makeClasses() + sizeToSizeClass := makeSizeToSizeClass(classes) + + if *stdout { + if _, err := os.Stdout.Write(mustFormat(generateSizeClasses(classes))); err != nil { + log.Fatal(err) + } + return + } + + sizeclasesesfile := "../../internal/runtime/gc/sizeclasses.go" + if err := os.WriteFile(sizeclasesesfile, mustFormat(generateSizeClasses(classes)), 0666); err != nil { + log.Fatal(err) + } + + outfile := "../malloc_generated.go" + if err := os.WriteFile(outfile, mustFormat(inline(specializedMallocConfig(classes, sizeToSizeClass))), 0666); err != nil { + log.Fatal(err) + } + + tablefile := "../malloc_tables_generated.go" + if err := os.WriteFile(tablefile, mustFormat(generateTable(sizeToSizeClass)), 0666); err != nil { + log.Fatal(err) + } +} + +// withLineNumbers returns b with line numbers added to help debugging. +func withLineNumbers(b []byte) []byte { + var buf bytes.Buffer + i := 1 + for line := range bytes.Lines(b) { + fmt.Fprintf(&buf, "%d: %s", i, line) + i++ + } + return buf.Bytes() +} + +// mustFormat formats the input source, or exits if there's an error. +func mustFormat(b []byte) []byte { + formatted, err := format.Source(b) + if err != nil { + log.Fatalf("error formatting source: %v\nsource:\n%s\n", err, withLineNumbers(b)) + } + return formatted +} + +// generatorConfig is the configuration for the generator. It uses the given file to find +// its templates, and generates each of the functions specified by specs. +type generatorConfig struct { + file string + specs []spec +} + +// spec is the specification for a function for the inliner to produce. The function gets +// the given name, and is produced by starting with the function with the name given by +// templateFunc and applying each of the ops. +type spec struct { + name string + templateFunc string + ops []op +} + +// replacementKind specifies the operation to ben done by a op. +type replacementKind int + +const ( + inlineFunc = replacementKind(iota) + subBasicLit +) + +// op is a single inlining operation for the inliner. Any calls to the function +// from are replaced with the inlined body of to. For non-functions, uses of from are +// replaced with the basic literal expression given by to. +type op struct { + kind replacementKind + from string + to string +} + +func smallScanNoHeaderSCFuncName(sc, scMax uint8) string { + if sc == 0 || sc > scMax { + return "mallocPanic" + } + return fmt.Sprintf("mallocgcSmallScanNoHeaderSC%d", sc) +} + +func tinyFuncName(size uintptr) string { + if size == 0 || size > smallScanNoHeaderMax { + return "mallocPanic" + } + return fmt.Sprintf("mallocTiny%d", size) +} + +func smallNoScanSCFuncName(sc, scMax uint8) string { + if sc < 2 || sc > scMax { + return "mallocPanic" + } + return fmt.Sprintf("mallocgcSmallNoScanSC%d", sc) +} + +// specializedMallocConfig produces an inlining config to stamp out the definitions of the size-specialized +// malloc functions to be written by mkmalloc. +func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generatorConfig { + config := generatorConfig{file: "../malloc_stubs.go"} + + // Only generate specialized functions for sizes that don't have + // a header on 64-bit platforms. (They may have a header on 32-bit, but + // we will fall back to the non-specialized versions in that case) + scMax := sizeToSizeClass[smallScanNoHeaderMax] + + str := fmt.Sprint + + // allocations with pointer bits + { + const noscan = 0 + for sc := uint8(0); sc <= scMax; sc++ { + if sc == 0 { + continue + } + name := smallScanNoHeaderSCFuncName(sc, scMax) + elemsize := classes[sc].size + config.specs = append(config.specs, spec{ + templateFunc: "mallocStub", + name: name, + ops: []op{ + {inlineFunc, "inlinedMalloc", "smallScanNoHeaderStub"}, + {inlineFunc, "heapSetTypeNoHeaderStub", "heapSetTypeNoHeaderStub"}, + {inlineFunc, "nextFreeFastStub", "nextFreeFastStub"}, + {inlineFunc, "writeHeapBitsSmallStub", "writeHeapBitsSmallStub"}, + {subBasicLit, "elemsize_", str(elemsize)}, + {subBasicLit, "sizeclass_", str(sc)}, + {subBasicLit, "noscanint_", str(noscan)}, + }, + }) + } + } + + // allocations without pointer bits + { + const noscan = 1 + + // tiny + tinySizeClass := sizeToSizeClass[tinySize] + for s := range uintptr(16) { + if s == 0 { + continue + } + name := tinyFuncName(s) + elemsize := classes[tinySizeClass].size + config.specs = append(config.specs, spec{ + templateFunc: "mallocStub", + name: name, + ops: []op{ + {inlineFunc, "inlinedMalloc", "tinyStub"}, + {inlineFunc, "nextFreeFastTiny", "nextFreeFastTiny"}, + {subBasicLit, "elemsize_", str(elemsize)}, + {subBasicLit, "sizeclass_", str(tinySizeClass)}, + {subBasicLit, "size_", str(s)}, + {subBasicLit, "noscanint_", str(noscan)}, + }, + }) + } + + // non-tiny + for sc := uint8(tinySizeClass); sc <= scMax; sc++ { + name := smallNoScanSCFuncName(sc, scMax) + elemsize := classes[sc].size + config.specs = append(config.specs, spec{ + templateFunc: "mallocStub", + name: name, + ops: []op{ + {inlineFunc, "inlinedMalloc", "smallNoScanStub"}, + {inlineFunc, "nextFreeFastStub", "nextFreeFastStub"}, + {subBasicLit, "elemsize_", str(elemsize)}, + {subBasicLit, "sizeclass_", str(sc)}, + {subBasicLit, "noscanint_", str(noscan)}, + }, + }) + } + } + + return config +} + +// inline applies the inlining operations given by the config. +func inline(config generatorConfig) []byte { + var out bytes.Buffer + + // Read the template file in. + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, config.file, nil, 0) + if err != nil { + log.Fatalf("parsing %s: %v", config.file, err) + } + + // Collect the function and import declarations. The function + // declarations in the template file provide both the templates + // that will be stamped out, and the functions that will be inlined + // into them. The imports from the template file will be copied + // straight to the output. + funcDecls := map[string]*ast.FuncDecl{} + importDecls := []*ast.GenDecl{} + for _, decl := range f.Decls { + switch decl := decl.(type) { + case *ast.FuncDecl: + funcDecls[decl.Name.Name] = decl + case *ast.GenDecl: + if decl.Tok.String() == "import" { + importDecls = append(importDecls, decl) + continue + } + } + } + + // Write out the package and import declarations. + out.WriteString("// Code generated by mkmalloc.go; DO NOT EDIT.\n\n") + out.WriteString("package " + f.Name.Name + "\n\n") + for _, importDecl := range importDecls { + out.Write(mustFormatNode(fset, importDecl)) + out.WriteString("\n\n") + } + + // Produce each of the inlined functions specified by specs. + for _, spec := range config.specs { + // Start with a renamed copy of the template function. + containingFuncCopy := internalastutil.CloneNode(funcDecls[spec.templateFunc]) + if containingFuncCopy == nil { + log.Fatal("did not find", spec.templateFunc) + } + containingFuncCopy.Name.Name = spec.name + + // Apply each of the ops given by the specs + stamped := ast.Node(containingFuncCopy) + for _, repl := range spec.ops { + if toDecl, ok := funcDecls[repl.to]; ok { + stamped = inlineFunction(stamped, repl.from, toDecl) + } else { + stamped = substituteWithBasicLit(stamped, repl.from, repl.to) + } + } + + out.Write(mustFormatNode(fset, stamped)) + out.WriteString("\n\n") + } + + return out.Bytes() +} + +// substituteWithBasicLit recursively renames identifiers in the provided AST +// according to 'from' and 'to'. +func substituteWithBasicLit(node ast.Node, from, to string) ast.Node { + // The op is a substitution of an identifier with an basic literal. + toExpr, err := parser.ParseExpr(to) + if err != nil { + log.Fatalf("parsing expr %q: %v", to, err) + } + if _, ok := toExpr.(*ast.BasicLit); !ok { + log.Fatalf("op 'to' expr %q is not a basic literal", to) + } + return astutil.Apply(node, func(cursor *astutil.Cursor) bool { + if isIdentWithName(cursor.Node(), from) { + cursor.Replace(toExpr) + } + return true + }, nil) +} + +// inlineFunction recursively replaces calls to the function 'from' with the body of the function +// 'toDecl'. All calls to 'from' must appear in assignment statements. +// The replacement is very simple: it doesn't substitute the arguments for the parameters, so the +// arguments to the function call must be the same identifier as the parameters to the function +// declared by 'toDecl'. If there are any calls to from where that's not the case there will be a fatal error. +func inlineFunction(node ast.Node, from string, toDecl *ast.FuncDecl) ast.Node { + return astutil.Apply(node, func(cursor *astutil.Cursor) bool { + switch node := cursor.Node().(type) { + case *ast.AssignStmt: + // TODO(matloob) CHECK function args have same name + // as parameters (or parameter is "_"). + if len(node.Rhs) == 1 && isCallTo(node.Rhs[0], from) { + args := node.Rhs[0].(*ast.CallExpr).Args + if !argsMatchParameters(args, toDecl.Type.Params) { + log.Fatalf("applying op: arguments to %v don't match parameter names of %v: %v", from, toDecl.Name, debugPrint(args...)) + } + replaceAssignment(cursor, node, toDecl) + } + return false + case *ast.CallExpr: + // double check that all calls to from appear within an assignment + if isCallTo(node, from) { + if _, ok := cursor.Parent().(*ast.AssignStmt); !ok { + log.Fatalf("applying op: all calls to function %q being replaced must appear in an assignment statement, appears in %T", from, cursor.Parent()) + } + } + } + return true + }, nil) +} + +// argsMatchParameters reports whether the arguments given by args are all identifiers +// whose names are the same as the corresponding parameters in params. +func argsMatchParameters(args []ast.Expr, params *ast.FieldList) bool { + var paramIdents []*ast.Ident + for _, f := range params.List { + paramIdents = append(paramIdents, f.Names...) + } + + if len(args) != len(paramIdents) { + return false + } + + for i := range args { + if !isIdentWithName(args[i], paramIdents[i].Name) { + return false + } + } + + return true +} + +// isIdentWithName reports whether the expression is an identifier with the given name. +func isIdentWithName(expr ast.Node, name string) bool { + ident, ok := expr.(*ast.Ident) + if !ok { + return false + } + return ident.Name == name +} + +// isCallTo reports whether the expression is a call expression to the function with the given name. +func isCallTo(expr ast.Expr, name string) bool { + callexpr, ok := expr.(*ast.CallExpr) + if !ok { + return false + } + return isIdentWithName(callexpr.Fun, name) +} + +// replaceAssignment replaces an assignment statement where the right hand side is a function call +// whose arguments have the same names as the parameters to funcdecl with the body of funcdecl. +// It sets the left hand side of the assignment to the return values of the function. +func replaceAssignment(cursor *astutil.Cursor, assign *ast.AssignStmt, funcdecl *ast.FuncDecl) { + if !hasTerminatingReturn(funcdecl.Body) { + log.Fatal("function being inlined must have a return at the end") + } + + body := internalastutil.CloneNode(funcdecl.Body) + if hasTerminatingAndNonterminatingReturn(funcdecl.Body) { + // The function has multiple return points. Add the code that we'd continue with in the caller + // after each of the return points. The calling function must have a terminating return + // so we don't continue execution in the replaced function after we finish executing the + // continue block that we add. + body = addContinues(cursor, assign, body, everythingFollowingInParent(cursor)).(*ast.BlockStmt) + } + + if len(body.List) < 1 { + log.Fatal("replacing with empty bodied function") + } + + // The op happens in two steps: first we insert the body of the function being inlined (except for + // the final return) before the assignment, and then we change the assignment statement to replace the function call + // with the expressions being returned. + + // Determine the expressions being returned. + beforeReturn, ret := body.List[:len(body.List)-1], body.List[len(body.List)-1] + returnStmt, ok := ret.(*ast.ReturnStmt) + if !ok { + log.Fatal("last stmt in function we're replacing with should be a return") + } + results := returnStmt.Results + + // Insert the body up to the final return. + for _, stmt := range beforeReturn { + cursor.InsertBefore(stmt) + } + + // Rewrite the assignment statement. + replaceWithAssignment(cursor, assign.Lhs, results, assign.Tok) +} + +// hasTerminatingReturn reparts whether the block ends in a return statement. +func hasTerminatingReturn(block *ast.BlockStmt) bool { + _, ok := block.List[len(block.List)-1].(*ast.ReturnStmt) + return ok +} + +// hasTerminatingAndNonterminatingReturn reports whether the block ends in a return +// statement, and also has a return elsewhere in it. +func hasTerminatingAndNonterminatingReturn(block *ast.BlockStmt) bool { + if !hasTerminatingReturn(block) { + return false + } + var ret bool + for i := range block.List[:len(block.List)-1] { + ast.Inspect(block.List[i], func(node ast.Node) bool { + _, ok := node.(*ast.ReturnStmt) + if ok { + ret = true + return false + } + return true + }) + } + return ret +} + +// everythingFollowingInParent returns a block with everything in the parent block node of the cursor after +// the cursor itself. The cursor must point to an element in a block node's list. +func everythingFollowingInParent(cursor *astutil.Cursor) *ast.BlockStmt { + parent := cursor.Parent() + block, ok := parent.(*ast.BlockStmt) + if !ok { + log.Fatal("internal error: in everythingFollowingInParent, cursor doesn't point to element in block list") + } + + blockcopy := internalastutil.CloneNode(block) // get a clean copy + blockcopy.List = blockcopy.List[cursor.Index()+1:] // and remove everything before and including stmt + + if _, ok := blockcopy.List[len(blockcopy.List)-1].(*ast.ReturnStmt); !ok { + log.Printf("%s", mustFormatNode(token.NewFileSet(), blockcopy)) + log.Fatal("internal error: parent doesn't end in a return") + } + return blockcopy +} + +// in the case that there's a return in the body being inlined (toBlock), addContinues +// replaces those returns that are not at the end of the function with the code in the +// caller after the function call that execution would continue with after the return. +// The block being added must end in a return. +func addContinues(cursor *astutil.Cursor, assignNode *ast.AssignStmt, toBlock *ast.BlockStmt, continueBlock *ast.BlockStmt) ast.Node { + if !hasTerminatingReturn(continueBlock) { + log.Fatal("the block being continued to in addContinues must end in a return") + } + applyFunc := func(cursor *astutil.Cursor) bool { + ret, ok := cursor.Node().(*ast.ReturnStmt) + if !ok { + return true + } + + if cursor.Parent() == toBlock && cursor.Index() == len(toBlock.List)-1 { + return false + } + + // This is the opposite of replacing a function call with the body. First + // we replace the return statement with the assignment from the caller, and + // then add the code we continue with. + replaceWithAssignment(cursor, assignNode.Lhs, ret.Results, assignNode.Tok) + cursor.InsertAfter(internalastutil.CloneNode(continueBlock)) + + return false + } + return astutil.Apply(toBlock, applyFunc, nil) +} + +// debugPrint prints out the expressions given by nodes for debugging. +func debugPrint(nodes ...ast.Expr) string { + var b strings.Builder + for i, node := range nodes { + b.Write(mustFormatNode(token.NewFileSet(), node)) + if i != len(nodes)-1 { + b.WriteString(", ") + } + } + return b.String() +} + +// mustFormatNode produces the formatted Go code for the given node. +func mustFormatNode(fset *token.FileSet, node any) []byte { + var buf bytes.Buffer + format.Node(&buf, fset, node) + return buf.Bytes() +} + +// mustMatchExprs makes sure that the expression lists have the same length, +// and returns the lists of the expressions on the lhs and rhs where the +// identifiers are not the same. These are used to produce assignment statements +// where the expressions on the right are assigned to the identifiers on the left. +func mustMatchExprs(lhs []ast.Expr, rhs []ast.Expr) ([]ast.Expr, []ast.Expr) { + if len(lhs) != len(rhs) { + log.Fatal("exprs don't match", debugPrint(lhs...), debugPrint(rhs...)) + } + + var newLhs, newRhs []ast.Expr + for i := range lhs { + lhsIdent, ok1 := lhs[i].(*ast.Ident) + rhsIdent, ok2 := rhs[i].(*ast.Ident) + if ok1 && ok2 && lhsIdent.Name == rhsIdent.Name { + continue + } + newLhs = append(newLhs, lhs[i]) + newRhs = append(newRhs, rhs[i]) + } + + return newLhs, newRhs +} + +// replaceWithAssignment replaces the node pointed to by the cursor with an assignment of the +// left hand side to the righthand side, removing any redundant assignments of a variable to itself, +// and replacing an assignment to a single basic literal with a constant declaration. +func replaceWithAssignment(cursor *astutil.Cursor, lhs, rhs []ast.Expr, tok token.Token) { + newLhs, newRhs := mustMatchExprs(lhs, rhs) + if len(newLhs) == 0 { + cursor.Delete() + return + } + if len(newRhs) == 1 { + if lit, ok := newRhs[0].(*ast.BasicLit); ok { + constDecl := &ast.DeclStmt{ + Decl: &ast.GenDecl{ + Tok: token.CONST, + Specs: []ast.Spec{ + &ast.ValueSpec{ + Names: []*ast.Ident{newLhs[0].(*ast.Ident)}, + Values: []ast.Expr{lit}, + }, + }, + }, + } + cursor.Replace(constDecl) + return + } + } + newAssignment := &ast.AssignStmt{ + Lhs: newLhs, + Rhs: newRhs, + Tok: tok, + } + cursor.Replace(newAssignment) +} + +// generateTable generates the file with the jump tables for the specialized malloc functions. +func generateTable(sizeToSizeClass []uint8) []byte { + scMax := sizeToSizeClass[smallScanNoHeaderMax] + + var b bytes.Buffer + fmt.Fprintln(&b, `// Code generated by mkmalloc.go; DO NOT EDIT. +//go:build !plan9 + +package runtime + +import "unsafe" + +var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`) + + for i := range uintptr(smallScanNoHeaderMax + 1) { + fmt.Fprintf(&b, "%s,\n", smallScanNoHeaderSCFuncName(sizeToSizeClass[i], scMax)) + } + + fmt.Fprintln(&b, ` +} + +var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`) + for i := range uintptr(smallScanNoHeaderMax + 1) { + if i < 16 { + fmt.Fprintf(&b, "%s,\n", tinyFuncName(i)) + } else { + fmt.Fprintf(&b, "%s,\n", smallNoScanSCFuncName(sizeToSizeClass[i], scMax)) + } + } + + fmt.Fprintln(&b, ` +}`) + + return b.Bytes() +} diff --git a/src/runtime/_mkmalloc/mkmalloc_test.go b/src/runtime/_mkmalloc/mkmalloc_test.go new file mode 100644 index 0000000000..bd15c3226a --- /dev/null +++ b/src/runtime/_mkmalloc/mkmalloc_test.go @@ -0,0 +1,36 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "bytes" + "os" + "testing" +) + +func TestNoChange(t *testing.T) { + classes := makeClasses() + sizeToSizeClass := makeSizeToSizeClass(classes) + + outfile := "../malloc_generated.go" + want, err := os.ReadFile(outfile) + if err != nil { + t.Fatal(err) + } + got := mustFormat(inline(specializedMallocConfig(classes, sizeToSizeClass))) + if !bytes.Equal(want, got) { + t.Fatalf("want:\n%s\ngot:\n%s\n", withLineNumbers(want), withLineNumbers(got)) + } + + tablefile := "../malloc_tables_generated.go" + wanttable, err := os.ReadFile(tablefile) + if err != nil { + t.Fatal(err) + } + gotTable := mustFormat(generateTable(sizeToSizeClass)) + if !bytes.Equal(wanttable, gotTable) { + t.Fatalf("want:\n%s\ngot:\n%s\n", withLineNumbers(wanttable), withLineNumbers(gotTable)) + } +} diff --git a/src/runtime/_mkmalloc/mksizeclasses.go b/src/runtime/_mkmalloc/mksizeclasses.go index a8d2d2db1e..2c39617c6b 100644 --- a/src/runtime/_mkmalloc/mksizeclasses.go +++ b/src/runtime/_mkmalloc/mksizeclasses.go @@ -31,19 +31,14 @@ import ( "bytes" "flag" "fmt" - "go/format" "io" - "log" "math" "math/bits" - "os" ) // Generate internal/runtime/gc/msize.go -var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go") - -func main() { +func generateSizeClasses(classes []class) []byte { flag.Parse() var b bytes.Buffer @@ -51,39 +46,14 @@ func main() { fmt.Fprintln(&b, "//go:generate go -C ../../../runtime/_mkmalloc run mksizeclasses.go") fmt.Fprintln(&b) fmt.Fprintln(&b, "package gc") - classes := makeClasses() printComment(&b, classes) printClasses(&b, classes) - out, err := format.Source(b.Bytes()) - if err != nil { - log.Fatal(err) - } - if *stdout { - _, err = os.Stdout.Write(out) - } else { - err = os.WriteFile("../../internal/runtime/gc/sizeclasses.go", out, 0666) - } - if err != nil { - log.Fatal(err) - } + return b.Bytes() } -const ( - // Constants that we use and will transfer to the runtime. - minHeapAlign = 8 - maxSmallSize = 32 << 10 - smallSizeDiv = 8 - smallSizeMax = 1024 - largeSizeDiv = 128 - pageShift = 13 - - // Derived constants. - pageSize = 1 << pageShift -) - type class struct { size int // max size npages int // number of pages @@ -294,6 +264,15 @@ func maxNPages(classes []class) int { } func printClasses(w io.Writer, classes []class) { + sizeToSizeClass := func(size int) int { + for j, c := range classes { + if c.size >= size { + return j + } + } + panic("unreachable") + } + fmt.Fprintln(w, "const (") fmt.Fprintf(w, "MinHeapAlign = %d\n", minHeapAlign) fmt.Fprintf(w, "MaxSmallSize = %d\n", maxSmallSize) @@ -304,6 +283,8 @@ func printClasses(w io.Writer, classes []class) { fmt.Fprintf(w, "PageShift = %d\n", pageShift) fmt.Fprintf(w, "MaxObjsPerSpan = %d\n", maxObjsPerSpan(classes)) fmt.Fprintf(w, "MaxSizeClassNPages = %d\n", maxNPages(classes)) + fmt.Fprintf(w, "TinySize = %d\n", tinySize) + fmt.Fprintf(w, "TinySizeClass = %d\n", sizeToSizeClass(tinySize)) fmt.Fprintln(w, ")") fmt.Fprint(w, "var SizeClassToSize = [NumSizeClasses]uint16 {") @@ -332,12 +313,7 @@ func printClasses(w io.Writer, classes []class) { sc := make([]int, smallSizeMax/smallSizeDiv+1) for i := range sc { size := i * smallSizeDiv - for j, c := range classes { - if c.size >= size { - sc[i] = j - break - } - } + sc[i] = sizeToSizeClass(size) } fmt.Fprint(w, "var SizeToSizeClass8 = [SmallSizeMax/SmallSizeDiv+1]uint8 {") for _, v := range sc { @@ -349,12 +325,7 @@ func printClasses(w io.Writer, classes []class) { sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1) for i := range sc { size := smallSizeMax + i*largeSizeDiv - for j, c := range classes { - if c.size >= size { - sc[i] = j - break - } - } + sc[i] = sizeToSizeClass(size) } fmt.Fprint(w, "var SizeToSizeClass128 = [(MaxSmallSize-SmallSizeMax)/LargeSizeDiv+1]uint8 {") for _, v := range sc { diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 5b5a633d9a..db91e89359 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -127,8 +127,8 @@ const ( _64bit = 1 << (^uintptr(0) >> 63) / 2 // Tiny allocator parameters, see "Tiny allocator" comment in malloc.go. - _TinySize = 16 - _TinySizeClass = int8(2) + _TinySize = gc.TinySize + _TinySizeClass = int8(gc.TinySizeClass) _FixAllocChunk = 16 << 10 // Chunk size for FixAlloc @@ -1080,6 +1080,12 @@ func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, checkGCTrigger // at scale. const doubleCheckMalloc = false +// sizeSpecializedMallocEnabled is the set of conditions where we enable the size-specialized +// mallocgc implementation: the experiment must be enabled, and none of the sanitizers should +// be enabled. The tables used to select the size-specialized malloc function do not compile +// properly on plan9, so size-specialized malloc is also disabled on plan9. +const sizeSpecializedMallocEnabled = goexperiment.SizeSpecializedMalloc && GOOS != "plan9" && !asanenabled && !raceenabled && !msanenabled && !valgrindenabled + // Allocate an object of size bytes. // Small objects are allocated from the per-P cache's free lists. // Large objects (> 32 kB) are allocated straight from the heap. @@ -1110,6 +1116,17 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { return unsafe.Pointer(&zerobase) } + if sizeSpecializedMallocEnabled && heapBitsInSpan(size) { + if typ == nil || !typ.Pointers() { + return mallocNoScanTable[size](size, typ, needzero) + } else { + if !needzero { + throw("objects with pointers must be zeroed") + } + return mallocScanTable[size](size, typ, needzero) + } + } + // It's possible for any malloc to trigger sweeping, which may in // turn queue finalizers. Record this dynamic lock edge. // N.B. Compiled away if lockrank experiment is not enabled. @@ -1138,25 +1155,41 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { // Actually do the allocation. var x unsafe.Pointer var elemsize uintptr - if size <= maxSmallSize-gc.MallocHeaderSize { - if typ == nil || !typ.Pointers() { - if size < maxTinySize { - x, elemsize = mallocgcTiny(size, typ) - } else { + if sizeSpecializedMallocEnabled { + // we know that heapBitsInSpan is true. + if size <= maxSmallSize-gc.MallocHeaderSize { + if typ == nil || !typ.Pointers() { x, elemsize = mallocgcSmallNoscan(size, typ, needzero) - } - } else { - if !needzero { - throw("objects with pointers must be zeroed") - } - if heapBitsInSpan(size) { - x, elemsize = mallocgcSmallScanNoHeader(size, typ) } else { + if !needzero { + throw("objects with pointers must be zeroed") + } x, elemsize = mallocgcSmallScanHeader(size, typ) } + } else { + x, elemsize = mallocgcLarge(size, typ, needzero) } } else { - x, elemsize = mallocgcLarge(size, typ, needzero) + if size <= maxSmallSize-gc.MallocHeaderSize { + if typ == nil || !typ.Pointers() { + if size < maxTinySize { + x, elemsize = mallocgcTiny(size, typ) + } else { + x, elemsize = mallocgcSmallNoscan(size, typ, needzero) + } + } else { + if !needzero { + throw("objects with pointers must be zeroed") + } + if heapBitsInSpan(size) { + x, elemsize = mallocgcSmallScanNoHeader(size, typ) + } else { + x, elemsize = mallocgcSmallScanHeader(size, typ) + } + } + } else { + x, elemsize = mallocgcLarge(size, typ, needzero) + } } // Notify sanitizers, if enabled. diff --git a/src/runtime/malloc_generated.go b/src/runtime/malloc_generated.go new file mode 100644 index 0000000000..600048c675 --- /dev/null +++ b/src/runtime/malloc_generated.go @@ -0,0 +1,8468 @@ +// Code generated by mkmalloc.go; DO NOT EDIT. + +package runtime + +import ( + "internal/goarch" + "internal/runtime/sys" + "unsafe" +) + +func mallocgcSmallScanNoHeaderSC1(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 1 + + const elemsize = 8 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(0) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 8 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(8)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 8 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(16)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 16 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 24 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(24)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 24 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 32 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(32)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 32 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 48 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(48)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 48 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 64 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(64)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 64 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 80 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(80)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 80 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 96 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(96)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 96 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 112 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(112)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 112 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 128 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(128)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 128 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 144 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(144)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 144 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 160 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(160)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 160 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 176 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(176)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 176 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 192 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(192)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 192 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 208 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(208)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 208 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 224 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(224)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 224 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 240 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(240)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 240 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 256 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(256)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 256 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 288 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(288)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 288 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 320 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(320)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 320 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 352 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(352)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 352 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 384 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(384)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 384 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 416 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(416)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 416 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 448 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(448)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 448 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 480 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(480)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 480 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 512 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + + c.scanAlloc += 8 + } else { + dataSize := size + x := uintptr(x) + + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(512)) { + throw("tried to write heap bits, but no heap bits in span") + } + + src0 := readUintptr(getGCMask(typ)) + + const elemsize = 512 + + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny2(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 2 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny3(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 3 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny4(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 4 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny5(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 5 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny6(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 6 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny7(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 7 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny8(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 8 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny9(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 9 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny10(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 10 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny11(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 11 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny12(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 12 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny13(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 13 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny14(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 14 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocTiny15(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const constsize = 15 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + c := getMCache(mp) + off := c.tinyoffset + + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + const elemsize = 0 + { + + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x + } + + } + + checkGCTrigger := false + span := c.alloc[tinySpanClass] + + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / + 16, + ) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + + x = add(x, elemsize-constsize) + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC2(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 2 + + const elemsize = 16 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 16 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC3(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 3 + + const elemsize = 24 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 24 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC4(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 4 + + const elemsize = 32 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 32 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC5(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 5 + + const elemsize = 48 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 48 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC6(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 6 + + const elemsize = 64 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 64 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC7(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 7 + + const elemsize = 80 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 80 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC8(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 8 + + const elemsize = 96 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 96 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC9(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 9 + + const elemsize = 112 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 112 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC10(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 10 + + const elemsize = 128 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 128 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC11(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 11 + + const elemsize = 144 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 144 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC12(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 12 + + const elemsize = 160 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 160 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC13(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 13 + + const elemsize = 176 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 176 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC14(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 14 + + const elemsize = 192 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 192 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC15(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 15 + + const elemsize = 208 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 208 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC16(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 16 + + const elemsize = 224 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 224 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC17(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 17 + + const elemsize = 240 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 240 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC18(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 18 + + const elemsize = 256 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 256 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC19(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 19 + + const elemsize = 288 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 288 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC20(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 20 + + const elemsize = 320 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 320 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC21(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 21 + + const elemsize = 352 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 352 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC22(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 22 + + const elemsize = 384 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 384 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC23(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 23 + + const elemsize = 416 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 416 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC24(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 24 + + const elemsize = 448 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 448 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC25(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 25 + + const elemsize = 480 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 480 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +func mallocgcSmallNoScanSC26(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + lockRankMayQueueFinalizer() + + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + const sizeclass = 26 + + const elemsize = 512 + + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(1) + span := c.alloc[spc] + + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)* + 512 + + span.base()) + } + } + } + v := nextFreeFastResult + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + publicationBarrier() + + if writeBarrier.enabled { + + gcmarknewobject(span, uintptr(x)) + } else { + + span.freeIndexForScan = span.freeindex + } + + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} diff --git a/src/runtime/malloc_stubs.go b/src/runtime/malloc_stubs.go new file mode 100644 index 0000000000..7fd1444189 --- /dev/null +++ b/src/runtime/malloc_stubs.go @@ -0,0 +1,586 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file contains stub functions that are not meant to be called directly, +// but that will be assembled together using the inlining logic in runtime/_mkmalloc +// to produce a full mallocgc function that's specialized for a span class +// or specific size in the case of the tiny allocator. +// +// To assemble a mallocgc function, the mallocStub function is cloned, and the call to +// inlinedMalloc is replaced with the inlined body of smallScanNoHeaderStub, +// smallNoScanStub or tinyStub, depending on the parameters being specialized. +// +// The size_ (for the tiny case) and elemsize_, sizeclass_, and noscanint_ (for all three cases) +// identifiers are replaced with the value of the parameter in the specialized case. +// The nextFreeFastStub, nextFreeFastTiny, heapSetTypeNoHeaderStub, and writeHeapBitsSmallStub +// functions are also inlined by _mkmalloc. + +package runtime + +import ( + "internal/goarch" + "internal/runtime/sys" + "unsafe" +) + +// These identifiers will all be replaced by the inliner. So their values don't +// really matter: they just need to be set so that the stub functions, which +// will never be used on their own, can compile. elemsize_ can't be set to +// zero because we divide by it in nextFreeFastTiny, and the compiler would +// complain about a division by zero. Its replaced value will always be greater +// than zero. +const elemsize_ = 8 +const sizeclass_ = 0 +const noscanint_ = 0 +const size_ = 0 + +func malloc0(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + // Short-circuit zero-sized allocation requests. + return unsafe.Pointer(&zerobase) +} + +func mallocPanic(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + panic("not defined for sizeclass") +} + +func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer { + if doubleCheckMalloc { + if gcphase == _GCmarktermination { + throw("mallocgc called with gcphase == _GCmarktermination") + } + } + + // It's possible for any malloc to trigger sweeping, which may in + // turn queue finalizers. Record this dynamic lock edge. + // N.B. Compiled away if lockrank experiment is not enabled. + lockRankMayQueueFinalizer() + + // Pre-malloc debug hooks. + if debug.malloc { + if x := preMallocgcDebug(size, typ); x != nil { + return x + } + } + + // Assist the GC if needed. + if gcBlackenEnabled != 0 { + deductAssistCredit(size) + } + + // Actually do the allocation. + x, elemsize := inlinedMalloc(size, typ, needzero) + + // Adjust our GC assist debt to account for internal fragmentation. + if gcBlackenEnabled != 0 && elemsize != 0 { + if assistG := getg().m.curg; assistG != nil { + assistG.gcAssistBytes -= int64(elemsize - size) + } + } + + // Post-malloc debug hooks. + if debug.malloc { + postMallocgcDebug(x, elemsize, typ) + } + return x +} + +// inlinedMalloc will never be called. It is defined just so that the compiler can compile +// the mallocStub function, which will also never be called, but instead used as a template +// to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub +// will be replaced with the inlined body of smallScanNoHeaderStub, smallNoScanStub, or tinyStub +// when generating the size-specialized malloc function. See the comment at the top of this +// file for more information. +func inlinedMalloc(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { + return unsafe.Pointer(uintptr(0)), 0 +} + +func doubleCheckSmallScanNoHeader(size uintptr, typ *_type, mp *m) { + if mp.mallocing != 0 { + throw("malloc deadlock") + } + if mp.gsignal == getg() { + throw("malloc during signal") + } + if typ == nil || !typ.Pointers() { + throw("noscan allocated in scan-only path") + } + if !heapBitsInSpan(size) { + throw("heap bits in not in span for non-header-only path") + } +} + +func smallScanNoHeaderStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { + const sizeclass = sizeclass_ + const elemsize = elemsize_ + + // Set mp.mallocing to keep from being preempted by GC. + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallScanNoHeader(size, typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(noscanint_) + span := c.alloc[spc] + v := nextFreeFastStub(span) + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + if goarch.PtrSize == 8 && sizeclass == 1 { + // initHeapBits already set the pointer bits for the 8-byte sizeclass + // on 64-bit platforms. + c.scanAlloc += 8 + } else { + dataSize := size // make the inliner happy + x := uintptr(x) + scanSize := heapSetTypeNoHeaderStub(x, dataSize, typ, span) + c.scanAlloc += scanSize + } + + // Ensure that the stores above that initialize x to + // type-safe memory and set the heap bits occur before + // the caller can make x observable to the garbage + // collector. Otherwise, on weakly ordered machines, + // the garbage collector could follow a pointer to x, + // but see uninitialized memory or stale heap bits. + publicationBarrier() + + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex + } + + // Note cache c only valid while m acquired; see #47302 + // + // N.B. Use the full size because that matches how the GC + // will update the mem profile on the "free" side. + // + // TODO(mknyszek): We should really count the header as part + // of gc_sys or something. The code below just pretends it is + // internal fragmentation and matches the GC's accounting by + // using the whole allocation slot. + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + return x, elemsize +} + +func doubleCheckSmallNoScan(typ *_type, mp *m) { + if mp.mallocing != 0 { + throw("malloc deadlock") + } + if mp.gsignal == getg() { + throw("malloc during signal") + } + if typ != nil && typ.Pointers() { + throw("expected noscan type for noscan alloc") + } +} + +func smallNoScanStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { + // TODO(matloob): Add functionality to mkmalloc to allow us to inline a non-constant + // sizeclass_ and elemsize_ value (instead just set to the expressions to look up the size class + // and elemsize. We'd also need to teach mkmalloc that values that are touched by these (specifically + // spc below) should turn into vars. This would allow us to generate mallocgcSmallNoScan itself, + // so that its code could not diverge from the generated functions. + const sizeclass = sizeclass_ + const elemsize = elemsize_ + + // Set mp.mallocing to keep from being preempted by GC. + mp := acquirem() + if doubleCheckMalloc { + doubleCheckSmallNoScan(typ, mp) + } + mp.mallocing = 1 + + checkGCTrigger := false + c := getMCache(mp) + const spc = spanClass(sizeclass<<1) | spanClass(noscanint_) + span := c.alloc[spc] + v := nextFreeFastStub(span) + if v == 0 { + v, span, checkGCTrigger = c.nextFree(spc) + } + x := unsafe.Pointer(v) + if needzero && span.needzero != 0 { + memclrNoHeapPointers(x, elemsize) + } + + // Ensure that the stores above that initialize x to + // type-safe memory and set the heap bits occur before + // the caller can make x observable to the garbage + // collector. Otherwise, on weakly ordered machines, + // the garbage collector could follow a pointer to x, + // but see uninitialized memory or stale heap bits. + publicationBarrier() + + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex + } + + // Note cache c only valid while m acquired; see #47302 + // + // N.B. Use the full size because that matches how the GC + // will update the mem profile on the "free" side. + // + // TODO(mknyszek): We should really count the header as part + // of gc_sys or something. The code below just pretends it is + // internal fragmentation and matches the GC's accounting by + // using the whole allocation slot. + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + return x, elemsize +} + +func doubleCheckTiny(size uintptr, typ *_type, mp *m) { + if mp.mallocing != 0 { + throw("malloc deadlock") + } + if mp.gsignal == getg() { + throw("malloc during signal") + } + if typ != nil && typ.Pointers() { + throw("expected noscan for tiny alloc") + } +} + +func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) { + const constsize = size_ + const elemsize = elemsize_ + + // Set mp.mallocing to keep from being preempted by GC. + mp := acquirem() + if doubleCheckMalloc { + doubleCheckTiny(constsize, typ, mp) + } + mp.mallocing = 1 + + // Tiny allocator. + // + // Tiny allocator combines several tiny allocation requests + // into a single memory block. The resulting memory block + // is freed when all subobjects are unreachable. The subobjects + // must be noscan (don't have pointers), this ensures that + // the amount of potentially wasted memory is bounded. + // + // Size of the memory block used for combining (maxTinySize) is tunable. + // Current setting is 16 bytes, which relates to 2x worst case memory + // wastage (when all but one subobjects are unreachable). + // 8 bytes would result in no wastage at all, but provides less + // opportunities for combining. + // 32 bytes provides more opportunities for combining, + // but can lead to 4x worst case wastage. + // The best case winning is 8x regardless of block size. + // + // Objects obtained from tiny allocator must not be freed explicitly. + // So when an object will be freed explicitly, we ensure that + // its size >= maxTinySize. + // + // SetFinalizer has a special case for objects potentially coming + // from tiny allocator, it such case it allows to set finalizers + // for an inner byte of a memory block. + // + // The main targets of tiny allocator are small strings and + // standalone escaping variables. On a json benchmark + // the allocator reduces number of allocations by ~12% and + // reduces heap size by ~20%. + c := getMCache(mp) + off := c.tinyoffset + // Align tiny pointer for required (conservative) alignment. + if constsize&7 == 0 { + off = alignUp(off, 8) + } else if goarch.PtrSize == 4 && constsize == 12 { + // Conservatively align 12-byte objects to 8 bytes on 32-bit + // systems so that objects whose first field is a 64-bit + // value is aligned to 8 bytes and does not cause a fault on + // atomic access. See issue 37262. + // TODO(mknyszek): Remove this workaround if/when issue 36606 + // is resolved. + off = alignUp(off, 8) + } else if constsize&3 == 0 { + off = alignUp(off, 4) + } else if constsize&1 == 0 { + off = alignUp(off, 2) + } + if off+constsize <= maxTinySize && c.tiny != 0 { + // The object fits into existing tiny block. + x := unsafe.Pointer(c.tiny + off) + c.tinyoffset = off + constsize + c.tinyAllocs++ + mp.mallocing = 0 + releasem(mp) + return x, 0 + } + // Allocate a new maxTinySize block. + checkGCTrigger := false + span := c.alloc[tinySpanClass] + v := nextFreeFastTiny(span) + if v == 0 { + v, span, checkGCTrigger = c.nextFree(tinySpanClass) + } + x := unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 // Always zero + (*[2]uint64)(x)[1] = 0 + // See if we need to replace the existing tiny block with the new one + // based on amount of remaining free space. + if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) { + // Note: disabled when race detector is on, see comment near end of this function. + c.tiny = uintptr(x) + c.tinyoffset = constsize + } + + // Ensure that the stores above that initialize x to + // type-safe memory and set the heap bits occur before + // the caller can make x observable to the garbage + // collector. Otherwise, on weakly ordered machines, + // the garbage collector could follow a pointer to x, + // but see uninitialized memory or stale heap bits. + publicationBarrier() + + if writeBarrier.enabled { + // Allocate black during GC. + // All slots hold nil so no scanning is needed. + // This may be racing with GC so do it atomically if there can be + // a race marking the bit. + gcmarknewobject(span, uintptr(x)) + } else { + // Track the last free index before the mark phase. This field + // is only used by the garbage collector. During the mark phase + // this is used by the conservative scanner to filter out objects + // that are both free and recently-allocated. It's safe to do that + // because we allocate-black if the GC is enabled. The conservative + // scanner produces pointers out of thin air, so without additional + // synchronization it might otherwise observe a partially-initialized + // object, which could crash the program. + span.freeIndexForScan = span.freeindex + } + + // Note cache c only valid while m acquired; see #47302 + // + // N.B. Use the full size because that matches how the GC + // will update the mem profile on the "free" side. + // + // TODO(mknyszek): We should really count the header as part + // of gc_sys or something. The code below just pretends it is + // internal fragmentation and matches the GC's accounting by + // using the whole allocation slot. + c.nextSample -= int64(elemsize) + if c.nextSample < 0 || MemProfileRate != c.memProfRate { + profilealloc(mp, x, elemsize) + } + mp.mallocing = 0 + releasem(mp) + + if checkGCTrigger { + if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { + gcStart(t) + } + } + + if raceenabled { + // Pad tinysize allocations so they are aligned with the end + // of the tinyalloc region. This ensures that any arithmetic + // that goes off the top end of the object will be detectable + // by checkptr (issue 38872). + // Note that we disable tinyalloc when raceenabled for this to work. + // TODO: This padding is only performed when the race detector + // is enabled. It would be nice to enable it if any package + // was compiled with checkptr, but there's no easy way to + // detect that (especially at compile time). + // TODO: enable this padding for all allocations, not just + // tinyalloc ones. It's tricky because of pointer maps. + // Maybe just all noscan objects? + x = add(x, elemsize-constsize) + } + return x, elemsize +} + +// TODO(matloob): Should we let the go compiler inline this instead of using mkmalloc? +// We won't be able to use elemsize_ but that's probably ok. +func nextFreeFastTiny(span *mspan) gclinkptr { + const nbytes = 8192 + const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / elemsize_) + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache? + result := span.freeindex + uint16(theBit) + if result < nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base()) + } + } + } + return nextFreeFastResult +} + +func nextFreeFastStub(span *mspan) gclinkptr { + var nextFreeFastResult gclinkptr + if span.allocCache != 0 { + theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache? + result := span.freeindex + uint16(theBit) + if result < span.nelems { + freeidx := result + 1 + if !(freeidx%64 == 0 && freeidx != span.nelems) { + span.allocCache >>= uint(theBit + 1) + span.freeindex = freeidx + span.allocCount++ + nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base()) + } + } + } + return nextFreeFastResult +} + +func heapSetTypeNoHeaderStub(x, dataSize uintptr, typ *_type, span *mspan) uintptr { + if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(elemsize_)) { + throw("tried to write heap bits, but no heap bits in span") + } + scanSize := writeHeapBitsSmallStub(span, x, dataSize, typ) + if doubleCheckHeapSetType { + doubleCheckHeapType(x, dataSize, typ, nil, span) + } + return scanSize +} + +// writeHeapBitsSmallStub writes the heap bits for small objects whose ptr/scalar data is +// stored as a bitmap at the end of the span. +// +// Assumes dataSize is <= ptrBits*goarch.PtrSize. x must be a pointer into the span. +// heapBitsInSpan(dataSize) must be true. dataSize must be >= typ.Size_. +// +//go:nosplit +func writeHeapBitsSmallStub(span *mspan, x, dataSize uintptr, typ *_type) uintptr { + // The objects here are always really small, so a single load is sufficient. + src0 := readUintptr(getGCMask(typ)) + + const elemsize = elemsize_ + + // Create repetitions of the bitmap if we have a small slice backing store. + scanSize := typ.PtrBytes + src := src0 + if typ.Size_ == goarch.PtrSize { + src = (1 << (dataSize / goarch.PtrSize)) - 1 + } else { + // N.B. We rely on dataSize being an exact multiple of the type size. + // The alternative is to be defensive and mask out src to the length + // of dataSize. The purpose is to save on one additional masking operation. + if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 { + throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_") + } + for i := typ.Size_; i < dataSize; i += typ.Size_ { + src |= src0 << (i / goarch.PtrSize) + scanSize += typ.Size_ + } + } + + // Since we're never writing more than one uintptr's worth of bits, we're either going + // to do one or two writes. + dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize) + dst := unsafe.Pointer(dstBase) + o := (x - span.base()) / goarch.PtrSize + i := o / ptrBits + j := o % ptrBits + const bits uintptr = elemsize / goarch.PtrSize + // In the if statement below, we have to do two uintptr writes if the bits + // we need to write straddle across two different memory locations. But if + // the number of bits we're writing divides evenly into the number of bits + // in the uintptr we're writing, this can never happen. Since bitsIsPowerOfTwo + // is a compile-time constant in the generated code, in the case where the size is + // a power of two less than or equal to ptrBits, the compiler can remove the + // 'two writes' branch of the if statement and always do only one write without + // the check. + const bitsIsPowerOfTwo = bits&(bits-1) == 0 + if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) { + // Two writes. + bits0 := ptrBits - j + bits1 := bits - bits0 + dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize)) + dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize)) + *dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j) + *dst1 = (*dst1)&^((1<> bits0) + } else { + // One write. + dst := (*uintptr)(add(dst, i*goarch.PtrSize)) + *dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)< ptrbits we always take the other branch + } + + const doubleCheck = false + if doubleCheck { + writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ) + } + return scanSize +} + +func writeHeapBitsDoubleCheck(span *mspan, x, dataSize, src, src0, i, j, bits uintptr, typ *_type) { + srcRead := span.heapBitsSmallForAddr(x) + if srcRead != src { + print("runtime: x=", hex(x), " i=", i, " j=", j, " bits=", bits, "\n") + print("runtime: dataSize=", dataSize, " typ.Size_=", typ.Size_, " typ.PtrBytes=", typ.PtrBytes, "\n") + print("runtime: src0=", hex(src0), " src=", hex(src), " srcRead=", hex(srcRead), "\n") + throw("bad pointer bits written for small object") + } +} diff --git a/src/runtime/malloc_tables_generated.go b/src/runtime/malloc_tables_generated.go new file mode 100644 index 0000000000..36650881fe --- /dev/null +++ b/src/runtime/malloc_tables_generated.go @@ -0,0 +1,1038 @@ +// Code generated by mkmalloc.go; DO NOT EDIT. +//go:build !plan9 + +package runtime + +import "unsafe" + +var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{ + mallocPanic, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC1, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC2, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC3, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC4, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC5, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC6, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC7, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC8, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC9, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC10, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC11, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC12, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC13, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC14, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC15, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC16, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC17, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC18, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC19, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC20, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC21, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC22, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC23, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC24, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC25, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, + mallocgcSmallScanNoHeaderSC26, +} + +var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{ + mallocPanic, + mallocTiny1, + mallocTiny2, + mallocTiny3, + mallocTiny4, + mallocTiny5, + mallocTiny6, + mallocTiny7, + mallocTiny8, + mallocTiny9, + mallocTiny10, + mallocTiny11, + mallocTiny12, + mallocTiny13, + mallocTiny14, + mallocTiny15, + mallocgcSmallNoScanSC2, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC3, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC4, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC5, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC6, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC7, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC8, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC9, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC10, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC11, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC12, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC13, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC14, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC15, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC16, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC17, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC18, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC19, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC20, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC21, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC22, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC23, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC24, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC25, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, + mallocgcSmallNoScanSC26, +} diff --git a/src/runtime/malloc_tables_plan9.go b/src/runtime/malloc_tables_plan9.go new file mode 100644 index 0000000000..4d2740bbb2 --- /dev/null +++ b/src/runtime/malloc_tables_plan9.go @@ -0,0 +1,14 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build plan9 + +package runtime + +import "unsafe" + +var ( + mallocScanTable []func(size uintptr, typ *_type, needzero bool) unsafe.Pointer + mallocNoScanTable []func(size uintptr, typ *_type, needzero bool) unsafe.Pointer +) diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go index 6cd525d5e9..bf58947bbc 100644 --- a/src/runtime/malloc_test.go +++ b/src/runtime/malloc_test.go @@ -452,3 +452,13 @@ func BenchmarkGoroutineIdle(b *testing.B) { close(quit) time.Sleep(10 * time.Millisecond) } + +func TestMkmalloc(t *testing.T) { + testenv.MustHaveGoRun(t) + testenv.MustHaveExternalNetwork(t) // To download the golang.org/x/tools dependency. + output, err := exec.Command("go", "-C", "_mkmalloc", "test").CombinedOutput() + t.Logf("test output:\n%s", output) + if err != nil { + t.Errorf("_mkmalloc tests failed: %v", err) + } +} -- cgit v1.3-5-g9baa From d7abfe4f0dc91568648a66495b9f5d7ebc0f22b5 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Fri, 30 May 2025 17:05:41 -0400 Subject: runtime: acquire/release C TSAN lock when calling cgo symbolizer/tracebacker When calling into C via cmd/cgo, the generated code calls _cgo_tsan_acquire / _cgo_tsan_release around the C call to report a dummy lock to the C/C++ TSAN runtime. This is necessary because the C/C++ TSAN runtime does not understand synchronization within Go and would otherwise report false positive race reports. See the comment in cmd/cgo/out.go for more details. Various C functions in runtime/cgo also contain manual calls to _cgo_tsan_acquire/release where necessary to suppress race reports. However, the cgo symbolizer and cgo traceback functions called from callCgoSymbolizer and cgoContextPCs, respectively, do not have any instrumentation [1]. They call directly into user C functions with no TSAN instrumentation. This means they have an opportunity to report false race conditions. The most direct way is via their argument. Both are passed a pointer to a struct stored on the Go stack, and both write to fields of the struct. If two calls are passed the same pointer from different threads, the C TSAN runtime will think this is a race. This is simple to achieve for the cgo symbolizer function, which the new regression test does. callCgoSymbolizer is called on the standard goroutine stack, so the argument is a pointer into the goroutine stack. If the goroutine moves Ms between two calls, it will look like a race. On the other hand, cgoContextPCs is called on the system stack. Each M has a unique system stack, so for it to pass the same argument pointer on different threads would require the first M to exit, free its stack, and the same region of address space to be used as the stack for a new M. Theoretically possible, but quite unlikely. Both of these are addressed by providing a C wrapper in runtime/cgo that calls _cgo_tsan_acquire/_cgo_tsan_release around calls to the symbolizer and traceback functions. There is a lot of room for future cleanup here. Most runtime/cgo functions have manual instrumentation in their C implementation. That could be removed in favor of instrumentation in the runtime. We could even theoretically remove the instrumentation from cmd/cgo and move it to cgocall. None of these are necessary, but may make things more consistent and easier to follow. [1] Note that the cgo traceback function called from the signal handler via x_cgo_callers _does_ have manual instrumentation. Fixes #73949. Cq-Include-Trybots: luci.golang.try:gotip-freebsd-amd64,gotip-linux-amd64-longtest,gotip-windows-amd64-longtest Change-Id: I6a6a636c9daa38f7fd00694af76b75cb93ba1886 Reviewed-on: https://go-review.googlesource.com/c/go/+/677955 Reviewed-by: Michael Knyszek Auto-Submit: Michael Pratt Reviewed-by: Ian Lance Taylor LUCI-TryBot-Result: Go LUCI --- .../testdata/tsan_tracebackctxt/main.go | 78 +++++++++++++++++++++ .../testdata/tsan_tracebackctxt/tracebackctxt_c.c | 70 +++++++++++++++++++ src/cmd/cgo/internal/testsanitizers/tsan_test.go | 3 +- src/runtime/cgo.go | 8 ++- src/runtime/cgo/callbacks.go | 31 +++++++-- src/runtime/cgo/gcc_context.c | 4 +- src/runtime/cgo/gcc_libinit.c | 79 ++++++++++++++++++--- src/runtime/cgo/gcc_libinit_windows.c | 80 +++++++++++++++++++--- src/runtime/cgo/libcgo.h | 52 +++++++++++--- src/runtime/symtab.go | 4 +- src/runtime/testdata/testprog/setcgotraceback.go | 45 ++++++++++++ src/runtime/traceback.go | 47 +++++++++---- src/runtime/traceback_test.go | 15 ++++ 13 files changed, 462 insertions(+), 54 deletions(-) create mode 100644 src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/main.go create mode 100644 src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/tracebackctxt_c.c create mode 100644 src/runtime/testdata/testprog/setcgotraceback.go (limited to 'src/runtime') diff --git a/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/main.go b/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/main.go new file mode 100644 index 0000000000..998a08ca53 --- /dev/null +++ b/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/main.go @@ -0,0 +1,78 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +/* +// Defined in tracebackctxt_c.c. +extern void C1(void); +extern void C2(void); +extern void tcContext(void*); +extern void tcTraceback(void*); +extern void tcSymbolizer(void*); +*/ +import "C" + +import ( + "fmt" + "runtime" + "sync" + "unsafe" +) + +// Regression test for https://go.dev/issue/73949. TSAN should not report races +// on writes to the argument passed to the symbolizer function. +// +// Triggering this race requires calls to the symbolizer function with the same +// argument pointer on multiple threads. The runtime passes a stack variable to +// this function, so that means we need to get a single goroutine to execute on +// two threads, calling the symbolizer function on each. +// +// runtime.CallersFrames / Next will call the symbolizer function (if there are +// C frames). So the approach here is, with GOMAXPROCS=2, have 2 goroutines +// that use CallersFrames over and over, both frequently calling Gosched in an +// attempt to get picked up by the other P. + +var tracebackOK bool + +func main() { + runtime.GOMAXPROCS(2) + runtime.SetCgoTraceback(0, unsafe.Pointer(C.tcTraceback), unsafe.Pointer(C.tcContext), unsafe.Pointer(C.tcSymbolizer)) + C.C1() + if tracebackOK { + fmt.Println("OK") + } +} + +//export G1 +func G1() { + C.C2() +} + +//export G2 +func G2() { + pc := make([]uintptr, 32) + n := runtime.Callers(0, pc) + + var wg sync.WaitGroup + for range 2 { + wg.Go(func() { + for range 1000 { + cf := runtime.CallersFrames(pc[:n]) + var frames []runtime.Frame + for { + frame, more := cf.Next() + frames = append(frames, frame) + if !more { + break + } + } + runtime.Gosched() + } + }) + } + wg.Wait() + + tracebackOK = true +} diff --git a/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/tracebackctxt_c.c b/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/tracebackctxt_c.c new file mode 100644 index 0000000000..9ddaa4aaf2 --- /dev/null +++ b/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/tracebackctxt_c.c @@ -0,0 +1,70 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The C definitions for tracebackctxt.go. That file uses //export so +// it can't put function definitions in the "C" import comment. + +#include +#include + +// Functions exported from Go. +extern void G1(void); +extern void G2(void); + +void C1() { + G1(); +} + +void C2() { + G2(); +} + +struct cgoContextArg { + uintptr_t context; +}; + +struct cgoTracebackArg { + uintptr_t context; + uintptr_t sigContext; + uintptr_t* buf; + uintptr_t max; +}; + +struct cgoSymbolizerArg { + uintptr_t pc; + const char* file; + uintptr_t lineno; + const char* func; + uintptr_t entry; + uintptr_t more; + uintptr_t data; +}; + +void tcContext(void* parg) { + struct cgoContextArg* arg = (struct cgoContextArg*)(parg); + if (arg->context == 0) { + arg->context = 1; + } +} + +void tcTraceback(void* parg) { + int base, i; + struct cgoTracebackArg* arg = (struct cgoTracebackArg*)(parg); + if (arg->max < 1) { + return; + } + arg->buf[0] = 6; // Chosen by fair dice roll. +} + +void tcSymbolizer(void *parg) { + struct cgoSymbolizerArg* arg = (struct cgoSymbolizerArg*)(parg); + if (arg->pc == 0) { + return; + } + // Report two lines per PC returned by traceback, to test more handling. + arg->more = arg->file == NULL; + arg->file = "tracebackctxt.go"; + arg->func = "cFunction"; + arg->lineno = arg->pc + (arg->more << 16); +} diff --git a/src/cmd/cgo/internal/testsanitizers/tsan_test.go b/src/cmd/cgo/internal/testsanitizers/tsan_test.go index 265c5e3605..589db2e6bc 100644 --- a/src/cmd/cgo/internal/testsanitizers/tsan_test.go +++ b/src/cmd/cgo/internal/testsanitizers/tsan_test.go @@ -56,6 +56,7 @@ func TestTSAN(t *testing.T) { {src: "tsan13.go", needsRuntime: true}, {src: "tsan14.go", needsRuntime: true}, {src: "tsan15.go", needsRuntime: true}, + {src: "tsan_tracebackctxt", needsRuntime: true}, // Subdirectory } for _, tc := range cases { tc := tc @@ -67,7 +68,7 @@ func TestTSAN(t *testing.T) { defer dir.RemoveAll(t) outPath := dir.Join(name) - mustRun(t, config.goCmd("build", "-o", outPath, srcPath(tc.src))) + mustRun(t, config.goCmd("build", "-o", outPath, "./"+srcPath(tc.src))) cmdArgs := []string{outPath} if goos == "linux" { diff --git a/src/runtime/cgo.go b/src/runtime/cgo.go index eca905bad9..60f2403d73 100644 --- a/src/runtime/cgo.go +++ b/src/runtime/cgo.go @@ -15,7 +15,9 @@ import "unsafe" //go:linkname _cgo_sys_thread_create _cgo_sys_thread_create //go:linkname _cgo_notify_runtime_init_done _cgo_notify_runtime_init_done //go:linkname _cgo_callers _cgo_callers -//go:linkname _cgo_set_context_function _cgo_set_context_function +//go:linkname _cgo_set_traceback_functions _cgo_set_traceback_functions +//go:linkname _cgo_call_traceback_function _cgo_call_traceback_function +//go:linkname _cgo_call_symbolizer_function _cgo_call_symbolizer_function //go:linkname _cgo_yield _cgo_yield //go:linkname _cgo_pthread_key_created _cgo_pthread_key_created //go:linkname _cgo_bindm _cgo_bindm @@ -27,7 +29,9 @@ var ( _cgo_sys_thread_create unsafe.Pointer _cgo_notify_runtime_init_done unsafe.Pointer _cgo_callers unsafe.Pointer - _cgo_set_context_function unsafe.Pointer + _cgo_set_traceback_functions unsafe.Pointer + _cgo_call_traceback_function unsafe.Pointer + _cgo_call_symbolizer_function unsafe.Pointer _cgo_yield unsafe.Pointer _cgo_pthread_key_created unsafe.Pointer _cgo_bindm unsafe.Pointer diff --git a/src/runtime/cgo/callbacks.go b/src/runtime/cgo/callbacks.go index 3c246a88b6..986f61914f 100644 --- a/src/runtime/cgo/callbacks.go +++ b/src/runtime/cgo/callbacks.go @@ -121,13 +121,30 @@ var _cgo_bindm = &x_cgo_bindm var x_cgo_notify_runtime_init_done byte var _cgo_notify_runtime_init_done = &x_cgo_notify_runtime_init_done -// Sets the traceback context function. See runtime.SetCgoTraceback. - -//go:cgo_import_static x_cgo_set_context_function -//go:linkname x_cgo_set_context_function x_cgo_set_context_function -//go:linkname _cgo_set_context_function _cgo_set_context_function -var x_cgo_set_context_function byte -var _cgo_set_context_function = &x_cgo_set_context_function +// Sets the traceback, context, and symbolizer functions. See +// runtime.SetCgoTraceback. + +//go:cgo_import_static x_cgo_set_traceback_functions +//go:linkname x_cgo_set_traceback_functions x_cgo_set_traceback_functions +//go:linkname _cgo_set_traceback_functions _cgo_set_traceback_functions +var x_cgo_set_traceback_functions byte +var _cgo_set_traceback_functions = &x_cgo_set_traceback_functions + +// Call the traceback function registered with x_cgo_set_traceback_functions. + +//go:cgo_import_static x_cgo_call_traceback_function +//go:linkname x_cgo_call_traceback_function x_cgo_call_traceback_function +//go:linkname _cgo_call_traceback_function _cgo_call_traceback_function +var x_cgo_call_traceback_function byte +var _cgo_call_traceback_function = &x_cgo_call_traceback_function + +// Call the symbolizer function registered with x_cgo_set_symbolizer_functions. + +//go:cgo_import_static x_cgo_call_symbolizer_function +//go:linkname x_cgo_call_symbolizer_function x_cgo_call_symbolizer_function +//go:linkname _cgo_call_symbolizer_function _cgo_call_symbolizer_function +var x_cgo_call_symbolizer_function byte +var _cgo_call_symbolizer_function = &x_cgo_call_symbolizer_function // Calls a libc function to execute background work injected via libc // interceptors, such as processing pending signals under the thread diff --git a/src/runtime/cgo/gcc_context.c b/src/runtime/cgo/gcc_context.c index ad58692821..b647c99a98 100644 --- a/src/runtime/cgo/gcc_context.c +++ b/src/runtime/cgo/gcc_context.c @@ -8,11 +8,11 @@ // Releases the cgo traceback context. void _cgo_release_context(uintptr_t ctxt) { - void (*pfn)(struct context_arg*); + void (*pfn)(struct cgoContextArg*); pfn = _cgo_get_context_function(); if (ctxt != 0 && pfn != nil) { - struct context_arg arg; + struct cgoContextArg arg; arg.Context = ctxt; (*pfn)(&arg); diff --git a/src/runtime/cgo/gcc_libinit.c b/src/runtime/cgo/gcc_libinit.c index e9b0a3f769..05998fadf8 100644 --- a/src/runtime/cgo/gcc_libinit.c +++ b/src/runtime/cgo/gcc_libinit.c @@ -32,8 +32,14 @@ static void pthread_key_destructor(void* g); uintptr_t x_cgo_pthread_key_created; void (*x_crosscall2_ptr)(void (*fn)(void *), void *, int, size_t); +// The traceback function, used when tracing C calls. +static void (*cgo_traceback_function)(struct cgoTracebackArg*); + // The context function, used when tracing back C calls into Go. -static void (*cgo_context_function)(struct context_arg*); +static void (*cgo_context_function)(struct cgoContextArg*); + +// The symbolizer function, used when symbolizing C frames. +static void (*cgo_symbolizer_function)(struct cgoSymbolizerArg*); void x_cgo_sys_thread_create(void* (*func)(void*), void* arg) { @@ -52,7 +58,7 @@ x_cgo_sys_thread_create(void* (*func)(void*), void* arg) { uintptr_t _cgo_wait_runtime_init_done(void) { - void (*pfn)(struct context_arg*); + void (*pfn)(struct cgoContextArg*); int done; pfn = __atomic_load_n(&cgo_context_function, __ATOMIC_CONSUME); @@ -70,7 +76,6 @@ _cgo_wait_runtime_init_done(void) { x_cgo_pthread_key_created = 1; } - // TODO(iant): For the case of a new C thread calling into Go, such // as when using -buildmode=c-archive, we know that Go runtime // initialization is complete but we do not know that all Go init @@ -87,7 +92,7 @@ _cgo_wait_runtime_init_done(void) { } if (pfn != nil) { - struct context_arg arg; + struct cgoContextArg arg; arg.Context = 0; (*pfn)(&arg); @@ -138,17 +143,71 @@ x_cgo_notify_runtime_init_done(void* dummy __attribute__ ((unused))) { pthread_mutex_unlock(&runtime_init_mu); } -// Sets the context function to call to record the traceback context -// when calling a Go function from C code. Called from runtime.SetCgoTraceback. -void x_cgo_set_context_function(void (*context)(struct context_arg*)) { - __atomic_store_n(&cgo_context_function, context, __ATOMIC_RELEASE); +// Sets the traceback, context, and symbolizer functions. Called from +// runtime.SetCgoTraceback. +void x_cgo_set_traceback_functions(struct cgoSetTracebackFunctionsArg* arg) { + __atomic_store_n(&cgo_traceback_function, arg->Traceback, __ATOMIC_RELEASE); + __atomic_store_n(&cgo_context_function, arg->Context, __ATOMIC_RELEASE); + __atomic_store_n(&cgo_symbolizer_function, arg->Symbolizer, __ATOMIC_RELEASE); +} + +// Gets the traceback function to call to trace C calls. +void (*(_cgo_get_traceback_function(void)))(struct cgoTracebackArg*) { + return __atomic_load_n(&cgo_traceback_function, __ATOMIC_CONSUME); +} + +// Call the traceback function registered with x_cgo_set_traceback_functions. +// +// The traceback function is an arbitrary user C function which may be built +// with TSAN, and thus must be wrapped with TSAN acquire/release calls. For +// normal cgo calls, cmd/cgo automatically inserts TSAN acquire/release calls. +// Since the traceback, context, and symbolizer functions are registered at +// startup and called via the runtime, they do not get automatic TSAN +// acquire/release calls. +// +// The only purpose of this wrapper is to perform TSAN acquire/release. +// Alternatively, if the runtime arranged to safely call TSAN acquire/release, +// it could perform the call directly. +void x_cgo_call_traceback_function(struct cgoTracebackArg* arg) { + void (*pfn)(struct cgoTracebackArg*); + + pfn = _cgo_get_traceback_function(); + if (pfn == nil) { + return; + } + + _cgo_tsan_acquire(); + (*pfn)(arg); + _cgo_tsan_release(); } -// Gets the context function. -void (*(_cgo_get_context_function(void)))(struct context_arg*) { +// Gets the context function to call to record the traceback context +// when calling a Go function from C code. +void (*(_cgo_get_context_function(void)))(struct cgoContextArg*) { return __atomic_load_n(&cgo_context_function, __ATOMIC_CONSUME); } +// Gets the symbolizer function to call to symbolize C frames. +void (*(_cgo_get_symbolizer_function(void)))(struct cgoSymbolizerArg*) { + return __atomic_load_n(&cgo_symbolizer_function, __ATOMIC_CONSUME); +} + +// Call the symbolizer function registered with x_cgo_set_traceback_functions. +// +// See comment on x_cgo_call_traceback_function. +void x_cgo_call_symbolizer_function(struct cgoSymbolizerArg* arg) { + void (*pfn)(struct cgoSymbolizerArg*); + + pfn = _cgo_get_symbolizer_function(); + if (pfn == nil) { + return; + } + + _cgo_tsan_acquire(); + (*pfn)(arg); + _cgo_tsan_release(); +} + // _cgo_try_pthread_create retries pthread_create if it fails with // EAGAIN. int diff --git a/src/runtime/cgo/gcc_libinit_windows.c b/src/runtime/cgo/gcc_libinit_windows.c index 9275185d6e..926f916843 100644 --- a/src/runtime/cgo/gcc_libinit_windows.c +++ b/src/runtime/cgo/gcc_libinit_windows.c @@ -32,6 +32,7 @@ static CRITICAL_SECTION runtime_init_cs; static HANDLE runtime_init_wait; static int runtime_init_done; +// No pthreads on Windows, these are always zero. uintptr_t x_cgo_pthread_key_created; void (*x_crosscall2_ptr)(void (*fn)(void *), void *, int, size_t); @@ -81,7 +82,7 @@ _cgo_is_runtime_initialized() { uintptr_t _cgo_wait_runtime_init_done(void) { - void (*pfn)(struct context_arg*); + void (*pfn)(struct cgoContextArg*); _cgo_maybe_run_preinit(); while (!_cgo_is_runtime_initialized()) { @@ -89,7 +90,7 @@ _cgo_wait_runtime_init_done(void) { } pfn = _cgo_get_context_function(); if (pfn != nil) { - struct context_arg arg; + struct cgoContextArg arg; arg.Context = 0; (*pfn)(&arg); @@ -118,20 +119,54 @@ x_cgo_notify_runtime_init_done(void* dummy) { } } +// The traceback function, used when tracing C calls. +static void (*cgo_traceback_function)(struct cgoTracebackArg*); + // The context function, used when tracing back C calls into Go. -static void (*cgo_context_function)(struct context_arg*); +static void (*cgo_context_function)(struct cgoContextArg*); + +// The symbolizer function, used when symbolizing C frames. +static void (*cgo_symbolizer_function)(struct cgoSymbolizerArg*); + +// Sets the traceback, context, and symbolizer functions. Called from +// runtime.SetCgoTraceback. +void x_cgo_set_traceback_functions(struct cgoSetTracebackFunctionsArg* arg) { + EnterCriticalSection(&runtime_init_cs); + cgo_traceback_function = arg->Traceback; + cgo_context_function = arg->Context; + cgo_symbolizer_function = arg->Symbolizer; + LeaveCriticalSection(&runtime_init_cs); +} + +// Gets the traceback function to call to trace C calls. +void (*(_cgo_get_traceback_function(void)))(struct cgoTracebackArg*) { + void (*ret)(struct cgoTracebackArg*); -// Sets the context function to call to record the traceback context -// when calling a Go function from C code. Called from runtime.SetCgoTraceback. -void x_cgo_set_context_function(void (*context)(struct context_arg*)) { EnterCriticalSection(&runtime_init_cs); - cgo_context_function = context; + ret = cgo_traceback_function; LeaveCriticalSection(&runtime_init_cs); + return ret; +} + +// Call the traceback function registered with x_cgo_set_traceback_functions. +// +// On other platforms, this coordinates with C/C++ TSAN. On Windows, there is +// no C/C++ TSAN. +void x_cgo_call_traceback_function(struct cgoTracebackArg* arg) { + void (*pfn)(struct cgoTracebackArg*); + + pfn = _cgo_get_traceback_function(); + if (pfn == nil) { + return; + } + + (*pfn)(arg); } -// Gets the context function. -void (*(_cgo_get_context_function(void)))(struct context_arg*) { - void (*ret)(struct context_arg*); +// Gets the context function to call to record the traceback context +// when calling a Go function from C code. +void (*(_cgo_get_context_function(void)))(struct cgoContextArg*) { + void (*ret)(struct cgoContextArg*); EnterCriticalSection(&runtime_init_cs); ret = cgo_context_function; @@ -139,6 +174,31 @@ void (*(_cgo_get_context_function(void)))(struct context_arg*) { return ret; } +// Gets the symbolizer function to call to symbolize C frames. +void (*(_cgo_get_symbolizer_function(void)))(struct cgoSymbolizerArg*) { + void (*ret)(struct cgoSymbolizerArg*); + + EnterCriticalSection(&runtime_init_cs); + ret = cgo_symbolizer_function; + LeaveCriticalSection(&runtime_init_cs); + return ret; +} + +// Call the symbolizer function registered with x_cgo_set_symbolizer_functions. +// +// On other platforms, this coordinates with C/C++ TSAN. On Windows, there is +// no C/C++ TSAN. +void x_cgo_call_symbolizer_function(struct cgoSymbolizerArg* arg) { + void (*pfn)(struct cgoSymbolizerArg*); + + pfn = _cgo_get_symbolizer_function(); + if (pfn == nil) { + return; + } + + (*pfn)(arg); +} + void _cgo_beginthread(unsigned long (__stdcall *func)(void*), void* arg) { int tries; HANDLE thandle; diff --git a/src/runtime/cgo/libcgo.h b/src/runtime/cgo/libcgo.h index 26da68fadb..aa0b57d6d7 100644 --- a/src/runtime/cgo/libcgo.h +++ b/src/runtime/cgo/libcgo.h @@ -89,15 +89,7 @@ void darwin_arm_init_thread_exception_port(void); void darwin_arm_init_mach_exception_handler(void); /* - * The cgo context function. See runtime.SetCgoTraceback. - */ -struct context_arg { - uintptr_t Context; -}; -extern void (*(_cgo_get_context_function(void)))(struct context_arg*); - -/* - * The argument for the cgo traceback callback. See runtime.SetCgoTraceback. + * The cgo traceback callback. See runtime.SetCgoTraceback. */ struct cgoTracebackArg { uintptr_t Context; @@ -105,6 +97,38 @@ struct cgoTracebackArg { uintptr_t* Buf; uintptr_t Max; }; +extern void (*(_cgo_get_traceback_function(void)))(struct cgoTracebackArg*); + +/* + * The cgo context callback. See runtime.SetCgoTraceback. + */ +struct cgoContextArg { + uintptr_t Context; +}; +extern void (*(_cgo_get_context_function(void)))(struct cgoContextArg*); + +/* + * The argument for the cgo symbolizer callback. See runtime.SetCgoTraceback. + */ +struct cgoSymbolizerArg { + uintptr_t PC; + const char* File; + uintptr_t Lineno; + const char* Func; + uintptr_t Entry; + uintptr_t More; + uintptr_t Data; +}; +extern void (*(_cgo_get_symbolizer_function(void)))(struct cgoSymbolizerArg*); + +/* + * The argument for x_cgo_set_traceback_functions. See runtime.SetCgoTraceback. + */ +struct cgoSetTracebackFunctionsArg { + void (*Traceback)(struct cgoTracebackArg*); + void (*Context)(struct cgoContextArg*); + void (*Symbolizer)(struct cgoSymbolizerArg*); +}; /* * TSAN support. This is only useful when building with @@ -121,11 +145,21 @@ struct cgoTracebackArg { #ifdef CGO_TSAN +// _cgo_tsan_acquire tells C/C++ TSAN that we are acquiring a dummy lock. We +// call this when calling from Go to C. This is necessary because TSAN cannot +// see the synchronization in Go. Note that C/C++ code built with TSAN is not +// the same as the Go race detector. +// +// cmd/cgo generates calls to _cgo_tsan_acquire and _cgo_tsan_release. For +// other cgo calls, manual calls are required. +// // These must match the definitions in yesTsanProlog in cmd/cgo/out.go. // In general we should call _cgo_tsan_acquire when we enter C code, // and call _cgo_tsan_release when we return to Go code. +// // This is only necessary when calling code that might be instrumented // by TSAN, which mostly means system library calls that TSAN intercepts. +// // See the comment in cmd/cgo/out.go for more details. long long _cgo_sync __attribute__ ((common)); diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go index 56f2a00d76..62ad8d1361 100644 --- a/src/runtime/symtab.go +++ b/src/runtime/symtab.go @@ -108,7 +108,7 @@ func (ci *Frames) Next() (frame Frame, more bool) { } funcInfo := findfunc(pc) if !funcInfo.valid() { - if cgoSymbolizer != nil { + if cgoSymbolizerAvailable() { // Pre-expand cgo frames. We could do this // incrementally, too, but there's no way to // avoid allocation in this case anyway. @@ -295,6 +295,8 @@ func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr { // expandCgoFrames expands frame information for pc, known to be // a non-Go function, using the cgoSymbolizer hook. expandCgoFrames // returns nil if pc could not be expanded. +// +// Preconditions: cgoSymbolizerAvailable returns true. func expandCgoFrames(pc uintptr) []Frame { arg := cgoSymbolizerArg{pc: pc} callCgoSymbolizer(&arg) diff --git a/src/runtime/testdata/testprog/setcgotraceback.go b/src/runtime/testdata/testprog/setcgotraceback.go new file mode 100644 index 0000000000..de005027ec --- /dev/null +++ b/src/runtime/testdata/testprog/setcgotraceback.go @@ -0,0 +1,45 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "internal/abi" + "runtime" + "unsafe" +) + +func init() { + register("SetCgoTracebackNoCgo", SetCgoTracebackNoCgo) +} + +func cgoTraceback() { + panic("unexpectedly reached cgo traceback function") +} + +func cgoContext() { + panic("unexpectedly reached cgo context function") +} + +func cgoSymbolizer() { + panic("unexpectedly reached cgo symbolizer function") +} + +// SetCgoTraceback is a no-op in non-cgo binaries. +func SetCgoTracebackNoCgo() { + traceback := unsafe.Pointer(abi.FuncPCABIInternal(cgoTraceback)) + context := unsafe.Pointer(abi.FuncPCABIInternal(cgoContext)) + symbolizer := unsafe.Pointer(abi.FuncPCABIInternal(cgoSymbolizer)) + runtime.SetCgoTraceback(0, traceback, context, symbolizer) + + // In a cgo binary, runtime.(*Frames).Next calls the cgo symbolizer for + // any non-Go frames. Pass in a bogus frame to verify that Next does + // not attempt to call the cgo symbolizer, which would crash in a + // non-cgo binary like this one. + frames := runtime.CallersFrames([]uintptr{0x12345678}) + frames.Next() + + fmt.Println("OK") +} diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 00eac59201..949d48c79a 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -591,7 +591,7 @@ func (u *unwinder) symPC() uintptr { // If the current frame is not a cgo frame or if there's no registered cgo // unwinder, it returns 0. func (u *unwinder) cgoCallers(pcBuf []uintptr) int { - if cgoTraceback == nil || u.frame.fn.funcID != abi.FuncID_cgocallback || u.cgoCtxt < 0 { + if !cgoTracebackAvailable() || u.frame.fn.funcID != abi.FuncID_cgocallback || u.cgoCtxt < 0 { // We don't have a cgo unwinder (typical case), or we do but we're not // in a cgo frame or we're out of cgo context. return 0 @@ -1014,7 +1014,7 @@ func traceback2(u *unwinder, showRuntime bool, skip, max int) (n, lastN int) { anySymbolized := false stop := false for _, pc := range cgoBuf[:cgoN] { - if cgoSymbolizer == nil { + if !cgoSymbolizerAvailable() { if pr, stop := commitFrame(); stop { break } else if pr { @@ -1573,10 +1573,18 @@ func SetCgoTraceback(version int, traceback, context, symbolizer unsafe.Pointer) cgoContext = context cgoSymbolizer = symbolizer - // The context function is called when a C function calls a Go - // function. As such it is only called by C code in runtime/cgo. - if _cgo_set_context_function != nil { - cgocall(_cgo_set_context_function, context) + if _cgo_set_traceback_functions != nil { + type cgoSetTracebackFunctionsArg struct { + traceback unsafe.Pointer + context unsafe.Pointer + symbolizer unsafe.Pointer + } + arg := cgoSetTracebackFunctionsArg{ + traceback: traceback, + context: context, + symbolizer: symbolizer, + } + cgocall(_cgo_set_traceback_functions, noescape(unsafe.Pointer(&arg))) } } @@ -1584,6 +1592,18 @@ var cgoTraceback unsafe.Pointer var cgoContext unsafe.Pointer var cgoSymbolizer unsafe.Pointer +func cgoTracebackAvailable() bool { + // - The traceback function must be registered via SetCgoTraceback. + // - This must be a cgo binary (providing _cgo_call_traceback_function). + return cgoTraceback != nil && _cgo_call_traceback_function != nil +} + +func cgoSymbolizerAvailable() bool { + // - The symbolizer function must be registered via SetCgoTraceback. + // - This must be a cgo binary (providing _cgo_call_symbolizer_function). + return cgoSymbolizer != nil && _cgo_call_symbolizer_function != nil +} + // cgoTracebackArg is the type passed to cgoTraceback. type cgoTracebackArg struct { context uintptr @@ -1610,7 +1630,7 @@ type cgoSymbolizerArg struct { // printCgoTraceback prints a traceback of callers. func printCgoTraceback(callers *cgoCallers) { - if cgoSymbolizer == nil { + if !cgoSymbolizerAvailable() { for _, c := range callers { if c == 0 { break @@ -1635,6 +1655,8 @@ func printCgoTraceback(callers *cgoCallers) { // printOneCgoTraceback prints the traceback of a single cgo caller. // This can print more than one line because of inlining. // It returns the "stop" result of commitFrame. +// +// Preconditions: cgoSymbolizerAvailable returns true. func printOneCgoTraceback(pc uintptr, commitFrame func() (pr, stop bool), arg *cgoSymbolizerArg) bool { arg.pc = pc for { @@ -1665,6 +1687,8 @@ func printOneCgoTraceback(pc uintptr, commitFrame func() (pr, stop bool), arg *c } // callCgoSymbolizer calls the cgoSymbolizer function. +// +// Preconditions: cgoSymbolizerAvailable returns true. func callCgoSymbolizer(arg *cgoSymbolizerArg) { call := cgocall if panicking.Load() > 0 || getg().m.curg != getg() { @@ -1678,14 +1702,13 @@ func callCgoSymbolizer(arg *cgoSymbolizerArg) { if asanenabled { asanwrite(unsafe.Pointer(arg), unsafe.Sizeof(cgoSymbolizerArg{})) } - call(cgoSymbolizer, noescape(unsafe.Pointer(arg))) + call(_cgo_call_symbolizer_function, noescape(unsafe.Pointer(arg))) } // cgoContextPCs gets the PC values from a cgo traceback. +// +// Preconditions: cgoTracebackAvailable returns true. func cgoContextPCs(ctxt uintptr, buf []uintptr) { - if cgoTraceback == nil { - return - } call := cgocall if panicking.Load() > 0 || getg().m.curg != getg() { // We do not want to call into the scheduler when panicking @@ -1703,5 +1726,5 @@ func cgoContextPCs(ctxt uintptr, buf []uintptr) { if asanenabled { asanwrite(unsafe.Pointer(&arg), unsafe.Sizeof(arg)) } - call(cgoTraceback, noescape(unsafe.Pointer(&arg))) + call(_cgo_call_traceback_function, noescape(unsafe.Pointer(&arg))) } diff --git a/src/runtime/traceback_test.go b/src/runtime/traceback_test.go index 8cbccac673..1dac91311c 100644 --- a/src/runtime/traceback_test.go +++ b/src/runtime/traceback_test.go @@ -8,6 +8,9 @@ import ( "bytes" "fmt" "internal/abi" + "internal/asan" + "internal/msan" + "internal/race" "internal/testenv" "regexp" "runtime" @@ -867,3 +870,15 @@ func TestTracebackGeneric(t *testing.T) { } } } + +func TestSetCgoTracebackNoCgo(t *testing.T) { + if asan.Enabled || msan.Enabled || race.Enabled { + t.Skip("skipped test: sanitizer builds use cgo") + } + + output := runTestProg(t, "testprog", "SetCgoTracebackNoCgo") + want := "OK\n" + if output != want { + t.Fatalf("want %s, got %s\n", want, output) + } +} -- cgit v1.3-5-g9baa