aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/_mkmalloc/mkmalloc.go3
-rw-r--r--src/runtime/arena_test.go15
-rw-r--r--src/runtime/asm_amd64.s15
-rw-r--r--src/runtime/asm_arm64.s15
-rw-r--r--src/runtime/asm_riscv64.s10
-rw-r--r--src/runtime/crash_test.go9
-rw-r--r--src/runtime/debuglog.go54
-rw-r--r--src/runtime/export_test.go76
-rw-r--r--src/runtime/hexdump.go269
-rw-r--r--src/runtime/hexdump_test.go151
-rw-r--r--src/runtime/malloc.go336
-rw-r--r--src/runtime/malloc_generated.go651
-rw-r--r--src/runtime/malloc_stubs.go22
-rw-r--r--src/runtime/malloc_test.go375
-rw-r--r--src/runtime/mcache.go52
-rw-r--r--src/runtime/mcleanup.go86
-rw-r--r--src/runtime/mcleanup_test.go28
-rw-r--r--src/runtime/mgc.go8
-rw-r--r--src/runtime/mgcmark.go21
-rw-r--r--src/runtime/mgcmark_greenteagc.go13
-rw-r--r--src/runtime/mgcpacer.go91
-rw-r--r--src/runtime/mgcsweep.go2
-rw-r--r--src/runtime/mheap.go35
-rw-r--r--src/runtime/panic.go2
-rw-r--r--src/runtime/print.go41
-rw-r--r--src/runtime/proc.go155
-rw-r--r--src/runtime/proc_test.go50
-rw-r--r--src/runtime/runtime2.go16
-rw-r--r--src/runtime/slice.go104
-rw-r--r--src/runtime/slice_test.go319
-rw-r--r--src/runtime/sys_riscv64.go11
-rw-r--r--src/runtime/testdata/testprog/crash.go11
-rw-r--r--src/runtime/testdata/testprog/gc.go2
-rw-r--r--src/runtime/testdata/testprog/stw_trace.go111
-rw-r--r--src/runtime/traceback.go21
35 files changed, 3004 insertions, 176 deletions
diff --git a/src/runtime/_mkmalloc/mkmalloc.go b/src/runtime/_mkmalloc/mkmalloc.go
index 986b0aa9f8..1f040c8861 100644
--- a/src/runtime/_mkmalloc/mkmalloc.go
+++ b/src/runtime/_mkmalloc/mkmalloc.go
@@ -254,7 +254,8 @@ func inline(config generatorConfig) []byte {
}
// Write out the package and import declarations.
- out.WriteString("// Code generated by mkmalloc.go; DO NOT EDIT.\n\n")
+ out.WriteString("// Code generated by mkmalloc.go; DO NOT EDIT.\n")
+ out.WriteString("// See overview in malloc_stubs.go.\n\n")
out.WriteString("package " + f.Name.Name + "\n\n")
for _, importDecl := range importDecls {
out.Write(mustFormatNode(fset, importDecl))
diff --git a/src/runtime/arena_test.go b/src/runtime/arena_test.go
index ca5223b59c..0bb1950464 100644
--- a/src/runtime/arena_test.go
+++ b/src/runtime/arena_test.go
@@ -36,6 +36,11 @@ type largeScalar [UserArenaChunkBytes + 1]byte
type largePointer [UserArenaChunkBytes/unsafe.Sizeof(&smallPointer{}) + 1]*smallPointer
func TestUserArena(t *testing.T) {
+ if Clobberfree() {
+ // This test crashes with SEGV in clobberfree in mgcsweep.go with GODEBUG=clobberfree=1.
+ t.Skip("triggers SEGV with GODEBUG=clobberfree=1")
+ }
+
// Set GOMAXPROCS to 2 so we don't run too many of these
// tests in parallel.
defer GOMAXPROCS(GOMAXPROCS(2))
@@ -228,6 +233,11 @@ func runSubTestUserArenaSlice[S comparable](t *testing.T, value []S, parallel bo
}
func TestUserArenaLiveness(t *testing.T) {
+ if Clobberfree() {
+ // This test crashes with SEGV in clobberfree in mgcsweep.go with GODEBUG=clobberfree=1.
+ t.Skip("triggers SEGV with GODEBUG=clobberfree=1")
+ }
+
t.Run("Free", func(t *testing.T) {
testUserArenaLiveness(t, false)
})
@@ -320,6 +330,11 @@ func testUserArenaLiveness(t *testing.T, useArenaFinalizer bool) {
}
func TestUserArenaClearsPointerBits(t *testing.T) {
+ if Clobberfree() {
+ // This test crashes with SEGV in clobberfree in mgcsweep.go with GODEBUG=clobberfree=1.
+ t.Skip("triggers SEGV with GODEBUG=clobberfree=1")
+ }
+
// This is a regression test for a serious issue wherein if pointer bits
// aren't properly cleared, it's possible to allocate scalar data down
// into a previously pointer-ful area, causing misinterpretation by the GC.
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index ea85146936..7c746803a8 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -181,6 +181,14 @@ TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
MOVQ AX, 24(SP)
MOVQ BX, 32(SP)
+ // This is typically the entry point for Go programs.
+ // Call stack unwinding must not proceed past this frame.
+ // Set the frame pointer register to 0 so that frame pointer-based unwinders
+ // (which don't use debug info for performance reasons)
+ // won't attempt to unwind past this function.
+ // See go.dev/issue/63630
+ MOVQ $0, BP
+
// create istack out of the given (operating system) stack.
// _cgo_init may update stackguard.
MOVQ $runtime·g0(SB), DI
@@ -408,6 +416,13 @@ TEXT runtime·asminit(SB),NOSPLIT,$0-0
RET
TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
+ // This is the root frame of new Go-created OS threads.
+ // Call stack unwinding must not proceed past this frame.
+ // Set the frame pointer register to 0 so that frame pointer-based unwinders
+ // (which don't use debug info for performance reasons)
+ // won't attempt to unwind past this function.
+ // See go.dev/issue/63630
+ MOVD $0, BP
CALL runtime·mstart0(SB)
RET // not reached
diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s
index 902a7066aa..01f2690f4e 100644
--- a/src/runtime/asm_arm64.s
+++ b/src/runtime/asm_arm64.s
@@ -109,6 +109,14 @@ TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
MOVW R0, 8(RSP) // argc
MOVD R1, 16(RSP) // argv
+ // This is typically the entry point for Go programs.
+ // Call stack unwinding must not proceed past this frame.
+ // Set the frame pointer register to 0 so that frame pointer-based unwinders
+ // (which don't use debug info for performance reasons)
+ // won't attempt to unwind past this function.
+ // See go.dev/issue/63630
+ MOVD $0, R29
+
#ifdef TLS_darwin
// Initialize TLS.
MOVD ZR, g // clear g, make sure it's not junk.
@@ -248,6 +256,13 @@ TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
RET
TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
+ // This is the root frame of new Go-created OS threads.
+ // Call stack unwinding must not proceed past this frame.
+ // Set the frame pointer register to 0 so that frame pointer-based unwinders
+ // (which don't use debug info for performance reasons)
+ // won't attempt to unwind past this function.
+ // See go.dev/issue/63630
+ MOVD $0, R29
BL runtime·mstart0(SB)
RET // not reached
diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s
index 5bd16181ee..428701a503 100644
--- a/src/runtime/asm_riscv64.s
+++ b/src/runtime/asm_riscv64.s
@@ -623,14 +623,14 @@ TEXT _cgo_topofstack(SB),NOSPLIT,$8
RET
// func goexit(neverCallThisFunction)
-// The top-most function running on a goroutine
-// returns to goexit+PCQuantum.
+// The top-most function running on a goroutine, returns to goexit+PCQuantum*2.
+// Note that the NOPs are written in a manner that will not be compressed,
+// since the offset must be known by the runtime.
TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
- MOV ZERO, ZERO // NOP
+ WORD $0x00000013 // NOP
JMP runtime·goexit1(SB) // does not return
// traceback from goexit1 must hit code range of goexit
- MOV ZERO, ZERO // NOP
-
+ WORD $0x00000013 // NOP
// This is called from .init_array and follows the platform, not the Go ABI.
TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
diff --git a/src/runtime/crash_test.go b/src/runtime/crash_test.go
index 2b8ca549ad..00e67aeca0 100644
--- a/src/runtime/crash_test.go
+++ b/src/runtime/crash_test.go
@@ -413,6 +413,15 @@ func TestRepanickedPanicSandwich(t *testing.T) {
}
}
+func TestDoublePanicWithSameValue(t *testing.T) {
+ output := runTestProg(t, "testprog", "DoublePanicWithSameValue")
+ want := `panic: message
+`
+ if !strings.HasPrefix(output, want) {
+ t.Fatalf("output does not start with %q:\n%s", want, output)
+ }
+}
+
func TestGoexitCrash(t *testing.T) {
// External linking brings in cgo, causing deadlock detection not working.
testenv.MustInternalLink(t, deadlockBuildTypes)
diff --git a/src/runtime/debuglog.go b/src/runtime/debuglog.go
index e993e396c1..405f2455c6 100644
--- a/src/runtime/debuglog.go
+++ b/src/runtime/debuglog.go
@@ -196,7 +196,8 @@ const (
debugLogPtr
debugLogString
debugLogConstString
- debugLogStringOverflow
+ debugLogHexdump
+ debugLogOverflow
debugLogPC
debugLogTraceback
@@ -365,7 +366,7 @@ func (l *dloggerImpl) s(x string) *dloggerImpl {
l.w.uvarint(uint64(len(b)))
l.w.bytes(b)
if len(b) != len(x) {
- l.w.byte(debugLogStringOverflow)
+ l.w.byte(debugLogOverflow)
l.w.uvarint(uint64(len(x) - len(b)))
}
}
@@ -373,6 +374,32 @@ func (l *dloggerImpl) s(x string) *dloggerImpl {
}
//go:nosplit
+func (l dloggerFake) hexdump(p unsafe.Pointer, bytes uintptr) dloggerFake { return l }
+
+//go:nosplit
+func (l *dloggerImpl) hexdump(p unsafe.Pointer, bytes uintptr) *dloggerImpl {
+ var b []byte
+ bb := (*slice)(unsafe.Pointer(&b))
+ bb.array = unsafe.Pointer(p)
+ bb.len, bb.cap = int(bytes), int(bytes)
+ if len(b) > debugLogStringLimit {
+ b = b[:debugLogStringLimit]
+ }
+
+ l.w.byte(debugLogHexdump)
+ l.w.uvarint(uint64(uintptr(p)))
+ l.w.uvarint(uint64(len(b)))
+ l.w.bytes(b)
+
+ if uintptr(len(b)) != bytes {
+ l.w.byte(debugLogOverflow)
+ l.w.uvarint(uint64(bytes) - uint64(len(b)))
+ }
+
+ return l
+}
+
+//go:nosplit
func (l dloggerFake) pc(x uintptr) dloggerFake { return l }
//go:nosplit
@@ -708,9 +735,30 @@ func (r *debugLogReader) printVal() bool {
s := *(*string)(unsafe.Pointer(&str))
print(s)
- case debugLogStringOverflow:
+ case debugLogOverflow:
print("..(", r.uvarint(), " more bytes)..")
+ case debugLogHexdump:
+ p := uintptr(r.uvarint())
+ bl := r.uvarint()
+ if r.begin+bl > r.end {
+ r.begin = r.end
+ print("<hexdump length corrupted>")
+ break
+ }
+ println() // Start on a new line
+ hd := hexdumper{addr: p}
+ for bl > 0 {
+ b := r.data.b[r.begin%uint64(len(r.data.b)):]
+ if uint64(len(b)) > bl {
+ b = b[:bl]
+ }
+ r.begin += uint64(len(b))
+ bl -= uint64(len(b))
+ hd.write(b)
+ }
+ hd.close()
+
case debugLogPC:
printDebugLogPC(uintptr(r.uvarint()), false)
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 3a781b7551..6e0360aaca 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -238,6 +238,12 @@ func SetEnvs(e []string) { envs = e }
const PtrSize = goarch.PtrSize
+const ClobberdeadPtr = clobberdeadPtr
+
+func Clobberfree() bool {
+ return debug.clobberfree != 0
+}
+
var ForceGCPeriod = &forcegcperiod
// SetTracebackEnv is like runtime/debug.SetTraceback, but it raises
@@ -633,6 +639,34 @@ func RunGetgThreadSwitchTest() {
}
}
+// Expose freegc for testing.
+func Freegc(p unsafe.Pointer, size uintptr, noscan bool) {
+ freegc(p, size, noscan)
+}
+
+// Expose gcAssistBytes for the current g for testing.
+func AssistCredit() int64 {
+ assistG := getg()
+ if assistG.m.curg != nil {
+ assistG = assistG.m.curg
+ }
+ return assistG.gcAssistBytes
+}
+
+// Expose gcBlackenEnabled for testing.
+func GcBlackenEnable() bool {
+ // Note we do a non-atomic load here.
+ // Some checks against gcBlackenEnabled (e.g., in mallocgc)
+ // are currently done via non-atomic load for performance reasons,
+ // but other checks are done via atomic load (e.g., in mgcmark.go),
+ // so interpreting this value in a test may be subtle.
+ return gcBlackenEnabled != 0
+}
+
+const SizeSpecializedMallocEnabled = sizeSpecializedMallocEnabled
+
+const RuntimeFreegcEnabled = runtimeFreegcEnabled
+
const (
PageSize = pageSize
PallocChunkPages = pallocChunkPages
@@ -1472,6 +1506,15 @@ func Releasem() {
releasem(getg().m)
}
+// GoschedIfBusy is an explicit preemption check to call back
+// into the scheduler. This is useful for tests that run code
+// which spend most of their time as non-preemptible, as it
+// can be placed right after becoming preemptible again to ensure
+// that the scheduler gets a chance to preempt the goroutine.
+func GoschedIfBusy() {
+ goschedIfBusy()
+}
+
type PIController struct {
piController
}
@@ -1988,3 +2031,36 @@ func (head *ListHeadManual) Pop() unsafe.Pointer {
func (head *ListHeadManual) Remove(p unsafe.Pointer) {
head.l.remove(p)
}
+
+func Hexdumper(base uintptr, wordBytes int, mark func(addr uintptr, start func()), data ...[]byte) string {
+ buf := make([]byte, 0, 2048)
+ getg().writebuf = buf
+ h := hexdumper{addr: base, addrBytes: 4, wordBytes: uint8(wordBytes)}
+ if mark != nil {
+ h.mark = func(addr uintptr, m hexdumpMarker) {
+ mark(addr, m.start)
+ }
+ }
+ for _, d := range data {
+ h.write(d)
+ }
+ h.close()
+ n := len(getg().writebuf)
+ getg().writebuf = nil
+ if n == cap(buf) {
+ panic("Hexdumper buf too small")
+ }
+ return string(buf[:n])
+}
+
+func HexdumpWords(p, bytes uintptr) string {
+ buf := make([]byte, 0, 2048)
+ getg().writebuf = buf
+ hexdumpWords(p, bytes, nil)
+ n := len(getg().writebuf)
+ getg().writebuf = nil
+ if n == cap(buf) {
+ panic("HexdumpWords buf too small")
+ }
+ return string(buf[:n])
+}
diff --git a/src/runtime/hexdump.go b/src/runtime/hexdump.go
new file mode 100644
index 0000000000..0d7dbb540b
--- /dev/null
+++ b/src/runtime/hexdump.go
@@ -0,0 +1,269 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "internal/goarch"
+ "unsafe"
+)
+
+// hexdumpWords prints a word-oriented hex dump of [p, p+len).
+//
+// If mark != nil, it will be passed to hexdumper.mark.
+func hexdumpWords(p, len uintptr, mark func(uintptr, hexdumpMarker)) {
+ printlock()
+
+ // Provide a default annotation
+ symMark := func(u uintptr, hm hexdumpMarker) {
+ if mark != nil {
+ mark(u, hm)
+ }
+
+ // Can we symbolize this value?
+ val := *(*uintptr)(unsafe.Pointer(u))
+ fn := findfunc(val)
+ if fn.valid() {
+ hm.start()
+ print("<", funcname(fn), "+", hex(val-fn.entry()), ">\n")
+ }
+ }
+
+ h := hexdumper{addr: p, mark: symMark}
+ h.write(unsafe.Slice((*byte)(unsafe.Pointer(p)), len))
+ h.close()
+ printunlock()
+}
+
+// hexdumper is a Swiss-army knife hex dumper.
+//
+// To use, optionally set addr and wordBytes, then call write repeatedly,
+// followed by close.
+type hexdumper struct {
+ // addr is the address to print for the first byte of data.
+ addr uintptr
+
+ // addrBytes is the number of bytes of addr to print. If this is 0, it
+ // defaults to goarch.PtrSize.
+ addrBytes uint8
+
+ // wordBytes is the number of bytes in a word. If wordBytes is 1, this
+ // prints a byte-oriented dump. If it's > 1, this interprets the data as a
+ // sequence of words of the given size. If it's 0, it's treated as
+ // goarch.PtrSize.
+ wordBytes uint8
+
+ // mark is an optional function that can annotate values in the hex dump.
+ //
+ // If non-nil, it is called with the address of every complete, aligned word
+ // in the hex dump.
+ //
+ // If it decides to print an annotation, it must first call m.start(), then
+ // print the annotation, followed by a new line.
+ mark func(addr uintptr, m hexdumpMarker)
+
+ // Below here is state
+
+ ready int8 // 0=need to init state; 1=need to print header; 2=ready
+
+ // dataBuf accumulates a line at a time of data, in case it's split across
+ // buffers.
+ dataBuf [16]byte
+ dataPos uint8
+ dataSkip uint8 // Skip first n bytes of buf on first line
+
+ // toPos maps from byte offset in data to a visual offset in the printed line.
+ toPos [16]byte
+}
+
+type hexdumpMarker struct {
+ chars int
+}
+
+func (h *hexdumper) write(data []byte) {
+ if h.ready == 0 {
+ h.init()
+ }
+
+ // Handle leading data
+ if h.dataPos > 0 {
+ n := copy(h.dataBuf[h.dataPos:], data)
+ h.dataPos += uint8(n)
+ data = data[n:]
+ if h.dataPos < uint8(len(h.dataBuf)) {
+ return
+ }
+ h.flushLine(h.dataBuf[:])
+ h.dataPos = 0
+ }
+
+ // Handle full lines in data
+ for len(data) >= len(h.dataBuf) {
+ h.flushLine(data[:len(h.dataBuf)])
+ data = data[len(h.dataBuf):]
+ }
+
+ // Handle trailing data
+ h.dataPos = uint8(copy(h.dataBuf[:], data))
+}
+
+func (h *hexdumper) close() {
+ if h.dataPos > 0 {
+ h.flushLine(h.dataBuf[:h.dataPos])
+ }
+}
+
+func (h *hexdumper) init() {
+ const bytesPerLine = len(h.dataBuf)
+
+ if h.addrBytes == 0 {
+ h.addrBytes = goarch.PtrSize
+ } else if h.addrBytes < 0 || h.addrBytes > goarch.PtrSize {
+ throw("invalid addrBytes")
+ }
+
+ if h.wordBytes == 0 {
+ h.wordBytes = goarch.PtrSize
+ }
+ wb := int(h.wordBytes)
+ if wb < 0 || wb >= bytesPerLine || wb&(wb-1) != 0 {
+ throw("invalid wordBytes")
+ }
+
+ // Construct position mapping.
+ for i := range h.toPos {
+ // First, calculate the "field" within the line, applying byte swizzling.
+ field := 0
+ if goarch.BigEndian {
+ field = i
+ } else {
+ field = i ^ int(wb-1)
+ }
+ // Translate this field into a visual offset.
+ // "00112233 44556677 8899AABB CCDDEEFF"
+ h.toPos[i] = byte(field*2 + field/4 + field/8)
+ }
+
+ // The first line may need to skip some fields to get to alignment.
+ // Round down the starting address.
+ nAddr := h.addr &^ uintptr(bytesPerLine-1)
+ // Skip bytes to get to alignment.
+ h.dataPos = uint8(h.addr - nAddr)
+ h.dataSkip = uint8(h.addr - nAddr)
+ h.addr = nAddr
+
+ // We're ready to print the header.
+ h.ready = 1
+}
+
+func (h *hexdumper) flushLine(data []byte) {
+ const bytesPerLine = len(h.dataBuf)
+
+ const maxAddrChars = 2 * goarch.PtrSize
+ const addrSep = ": "
+ dataStart := int(2*h.addrBytes) + len(addrSep)
+ // dataChars uses the same formula to toPos above. We calculate it with the
+ // "last field", then add the size of the last field.
+ const dataChars = (bytesPerLine-1)*2 + (bytesPerLine-1)/4 + (bytesPerLine-1)/8 + 2
+ const asciiSep = " "
+ asciiStart := dataStart + dataChars + len(asciiSep)
+ const asciiChars = bytesPerLine
+ nlPos := asciiStart + asciiChars
+
+ var lineBuf [maxAddrChars + len(addrSep) + dataChars + len(asciiSep) + asciiChars + 1]byte
+ clear := func() {
+ for i := range lineBuf {
+ lineBuf[i] = ' '
+ }
+ }
+ clear()
+
+ if h.ready == 1 {
+ // Print column offsets header.
+ for offset, pos := range h.toPos {
+ h.fmtHex(lineBuf[dataStart+int(pos+1):][:1], uint64(offset))
+ }
+ // Print ASCII offsets.
+ for offset := range asciiChars {
+ h.fmtHex(lineBuf[asciiStart+offset:][:1], uint64(offset))
+ }
+ lineBuf[nlPos] = '\n'
+ gwrite(lineBuf[:nlPos+1])
+ clear()
+ h.ready = 2
+ }
+
+ // Format address.
+ h.fmtHex(lineBuf[:2*h.addrBytes], uint64(h.addr))
+ copy(lineBuf[2*h.addrBytes:], addrSep)
+ // Format data in hex and ASCII.
+ for offset, b := range data {
+ if offset < int(h.dataSkip) {
+ continue
+ }
+
+ pos := h.toPos[offset]
+ h.fmtHex(lineBuf[dataStart+int(pos):][:2], uint64(b))
+
+ copy(lineBuf[dataStart+dataChars:], asciiSep)
+ ascii := uint8('.')
+ if b >= ' ' && b <= '~' {
+ ascii = b
+ }
+ lineBuf[asciiStart+offset] = ascii
+ }
+ // Trim buffer.
+ end := asciiStart + len(data)
+ lineBuf[end] = '\n'
+ buf := lineBuf[:end+1]
+
+ // Print.
+ gwrite(buf)
+
+ // Print marks.
+ if h.mark != nil {
+ clear()
+ for offset := 0; offset+int(h.wordBytes) <= len(data); offset += int(h.wordBytes) {
+ if offset < int(h.dataSkip) {
+ continue
+ }
+ addr := h.addr + uintptr(offset)
+ // Find the position of the left edge of this word
+ caret := dataStart + int(min(h.toPos[offset], h.toPos[offset+int(h.wordBytes)-1]))
+ h.mark(addr, hexdumpMarker{caret})
+ }
+ }
+
+ h.addr += uintptr(bytesPerLine)
+ h.dataPos = 0
+ h.dataSkip = 0
+}
+
+// fmtHex formats v in base 16 into buf. It fills all of buf. If buf is too
+// small to represent v, it the output will start with '*'.
+func (h *hexdumper) fmtHex(buf []byte, v uint64) {
+ const dig = "0123456789abcdef"
+ i := len(buf) - 1
+ for ; i >= 0; i-- {
+ buf[i] = dig[v%16]
+ v /= 16
+ }
+ if v != 0 {
+ // Indicate that we couldn't fit the whole number.
+ buf[0] = '*'
+ }
+}
+
+func (m hexdumpMarker) start() {
+ var spaces [64]byte
+ for i := range spaces {
+ spaces[i] = ' '
+ }
+ for m.chars > len(spaces) {
+ gwrite(spaces[:])
+ m.chars -= len(spaces)
+ }
+ gwrite(spaces[:m.chars])
+ print("^ ")
+}
diff --git a/src/runtime/hexdump_test.go b/src/runtime/hexdump_test.go
new file mode 100644
index 0000000000..cc44e48e4b
--- /dev/null
+++ b/src/runtime/hexdump_test.go
@@ -0,0 +1,151 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+ "fmt"
+ "internal/abi"
+ "internal/goarch"
+ "runtime"
+ "slices"
+ "strings"
+ "testing"
+ "unsafe"
+)
+
+func TestHexdumper(t *testing.T) {
+ check := func(label, got, want string) {
+ got = strings.TrimRight(got, "\n")
+ want = strings.TrimPrefix(want, "\n")
+ want = strings.TrimRight(want, "\n")
+ if got != want {
+ t.Errorf("%s: got\n%s\nwant\n%s", label, got, want)
+ }
+ }
+
+ data := make([]byte, 32)
+ for i := range data {
+ data[i] = 0x10 + byte(i)
+ }
+
+ check("basic", runtime.Hexdumper(0, 1, nil, data), `
+ 0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef
+00000000: 10111213 14151617 18191a1b 1c1d1e1f ................
+00000010: 20212223 24252627 28292a2b 2c2d2e2f !"#$%&'()*+,-./`)
+
+ if !goarch.BigEndian {
+ // Different word sizes
+ check("word=4", runtime.Hexdumper(0, 4, nil, data), `
+ 3 2 1 0 7 6 5 4 b a 9 8 f e d c 0123456789abcdef
+00000000: 13121110 17161514 1b1a1918 1f1e1d1c ................
+00000010: 23222120 27262524 2b2a2928 2f2e2d2c !"#$%&'()*+,-./`)
+ check("word=8", runtime.Hexdumper(0, 8, nil, data), `
+ 7 6 5 4 3 2 1 0 f e d c b a 9 8 0123456789abcdef
+00000000: 17161514 13121110 1f1e1d1c 1b1a1918 ................
+00000010: 27262524 23222120 2f2e2d2c 2b2a2928 !"#$%&'()*+,-./`)
+ }
+
+ // Starting offset
+ check("offset=1", runtime.Hexdumper(1, 1, nil, data), `
+ 0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef
+00000000: 101112 13141516 1718191a 1b1c1d1e ...............
+00000010: 1f202122 23242526 2728292a 2b2c2d2e . !"#$%&'()*+,-.
+00000020: 2f /`)
+ if !goarch.BigEndian {
+ // ... combined with a word size
+ check("offset=1 and word=4", runtime.Hexdumper(1, 4, nil, data), `
+ 3 2 1 0 7 6 5 4 b a 9 8 f e d c 0123456789abcdef
+00000000: 121110 16151413 1a191817 1e1d1c1b ...............
+00000010: 2221201f 26252423 2a292827 2e2d2c2b . !"#$%&'()*+,-.
+00000020: 2f /`)
+ }
+
+ // Partial data full of annoying boundaries.
+ partials := make([][]byte, 0)
+ for i := 0; i < len(data); i += 2 {
+ partials = append(partials, data[i:i+2])
+ }
+ check("partials", runtime.Hexdumper(1, 1, nil, partials...), `
+ 0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef
+00000000: 101112 13141516 1718191a 1b1c1d1e ...............
+00000010: 1f202122 23242526 2728292a 2b2c2d2e . !"#$%&'()*+,-.
+00000020: 2f /`)
+
+ // Marks.
+ check("marks", runtime.Hexdumper(0, 1, func(addr uintptr, start func()) {
+ if addr%7 == 0 {
+ start()
+ println("mark")
+ }
+ }, data), `
+ 0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef
+00000000: 10111213 14151617 18191a1b 1c1d1e1f ................
+ ^ mark
+ ^ mark
+ ^ mark
+00000010: 20212223 24252627 28292a2b 2c2d2e2f !"#$%&'()*+,-./
+ ^ mark
+ ^ mark`)
+ if !goarch.BigEndian {
+ check("marks and word=4", runtime.Hexdumper(0, 4, func(addr uintptr, start func()) {
+ if addr%7 == 0 {
+ start()
+ println("mark")
+ }
+ }, data), `
+ 3 2 1 0 7 6 5 4 b a 9 8 f e d c 0123456789abcdef
+00000000: 13121110 17161514 1b1a1918 1f1e1d1c ................
+ ^ mark
+00000010: 23222120 27262524 2b2a2928 2f2e2d2c !"#$%&'()*+,-./
+ ^ mark`)
+ }
+}
+
+func TestHexdumpWords(t *testing.T) {
+ if goarch.BigEndian || goarch.PtrSize != 8 {
+ // We could support these, but it's kind of a pain.
+ t.Skip("requires 64-bit little endian")
+ }
+
+ // Most of this is in hexdumper. Here we just test the symbolizer.
+
+ pc := abi.FuncPCABIInternal(TestHexdumpWords)
+ pcs := slices.Repeat([]uintptr{pc}, 3)
+
+ // Make sure pcs doesn't move around on us.
+ var p runtime.Pinner
+ defer p.Unpin()
+ p.Pin(&pcs[0])
+ // Get a 16 byte, 16-byte-aligned chunk of pcs so the hexdump is simple.
+ start := uintptr(unsafe.Pointer(&pcs[0]))
+ start = (start + 15) &^ uintptr(15)
+
+ // Do the hex dump.
+ got := runtime.HexdumpWords(start, 16)
+
+ // Construct the expected output.
+ pcStr := fmt.Sprintf("%016x", pc)
+ pcStr = pcStr[:8] + " " + pcStr[8:] // Add middle space
+ ascii := make([]byte, 8)
+ for i := range ascii {
+ b := byte(pc >> (8 * i))
+ if b >= ' ' && b <= '~' {
+ ascii[i] = b
+ } else {
+ ascii[i] = '.'
+ }
+ }
+ want := fmt.Sprintf(`
+ 7 6 5 4 3 2 1 0 f e d c b a 9 8 0123456789abcdef
+%016x: %s %s %s%s
+ ^ <runtime_test.TestHexdumpWords+0x0>
+ ^ <runtime_test.TestHexdumpWords+0x0>
+`, start, pcStr, pcStr, ascii, ascii)
+ want = strings.TrimPrefix(want, "\n")
+
+ if got != want {
+ t.Errorf("got\n%s\nwant\n%s", got, want)
+ }
+}
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index fc4f21b532..d49dacaf68 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1080,7 +1080,8 @@ func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, checkGCTrigger
//
// We might consider turning these on by default; many of them previously were.
// They account for a few % of mallocgc's cost though, which does matter somewhat
-// at scale.
+// at scale. (When testing changes to malloc, consider enabling this, and also
+// some function-local 'doubleCheck' consts such as in mbitmap.go currently.)
const doubleCheckMalloc = false
// sizeSpecializedMallocEnabled is the set of conditions where we enable the size-specialized
@@ -1089,6 +1090,14 @@ const doubleCheckMalloc = false
// properly on plan9, so size-specialized malloc is also disabled on plan9.
const sizeSpecializedMallocEnabled = goexperiment.SizeSpecializedMalloc && GOOS != "plan9" && !asanenabled && !raceenabled && !msanenabled && !valgrindenabled
+// runtimeFreegcEnabled is the set of conditions where we enable the runtime.freegc
+// implementation and the corresponding allocation-related changes: the experiment must be
+// enabled, and none of the memory sanitizers should be enabled. We allow the race detector,
+// in contrast to sizeSpecializedMallocEnabled.
+// TODO(thepudds): it would be nice to check Valgrind integration, though there are some hints
+// there might not be any canned tests in tree for Go's integration with Valgrind.
+const runtimeFreegcEnabled = goexperiment.RuntimeFreegc && !asanenabled && !msanenabled && !valgrindenabled
+
// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
@@ -1150,7 +1159,8 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
size += asanRZ
}
- // Assist the GC if needed.
+ // Assist the GC if needed. (On the reuse path, we currently compensate for this;
+ // changes here might require changes there.)
if gcBlackenEnabled != 0 {
deductAssistCredit(size)
}
@@ -1413,6 +1423,16 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe
size = uintptr(gc.SizeClassToSize[sizeclass])
spc := makeSpanClass(sizeclass, true)
span := c.alloc[spc]
+
+ // First, check for a reusable object.
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+ // We have a reusable object, use it.
+ x := mallocgcSmallNoscanReuse(c, span, spc, size, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ return x, size
+ }
+
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
@@ -1472,6 +1492,55 @@ func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointe
return x, size
}
+// mallocgcSmallNoscanReuse returns a previously freed noscan object after preparing it for reuse.
+// It must only be called if hasReusableNoscan returned true.
+func mallocgcSmallNoscanReuse(c *mcache, span *mspan, spc spanClass, size uintptr, needzero bool) unsafe.Pointer {
+ // TODO(thepudds): could nextFreeFast, nextFree and nextReusable return unsafe.Pointer?
+ // Maybe doesn't matter. gclinkptr might be for historical reasons.
+ v, span := c.nextReusableNoScan(span, spc)
+ x := unsafe.Pointer(v)
+
+ // Compensate for the GC assist credit deducted in mallocgc (before calling us and
+ // after we return) because this is not a newly allocated object. We use the full slot
+ // size (elemsize) here because that's what mallocgc deducts overall. Note we only
+ // adjust this when gcBlackenEnabled is true, which follows mallocgc behavior.
+ // TODO(thepudds): a follow-up CL adds a more specific test of our assist credit
+ // handling, including for validating internal fragmentation handling.
+ if gcBlackenEnabled != 0 {
+ addAssistCredit(size)
+ }
+
+ // This is a previously used object, so only check needzero (and not span.needzero)
+ // for clearing.
+ if needzero {
+ memclrNoHeapPointers(x, size)
+ }
+
+ // See publicationBarrier comment in mallocgcSmallNoscan.
+ publicationBarrier()
+
+ // Finish and return. Note that we do not update span.freeIndexForScan, profiling info,
+ // nor do we check gcTrigger.
+ // TODO(thepudds): the current approach is viable for a GOEXPERIMENT, but
+ // means we do not profile reused heap objects. Ultimately, we will need a better
+ // approach for profiling, or at least ensure we are not introducing bias in the
+ // profiled allocations.
+ // TODO(thepudds): related, we probably want to adjust how allocs and frees are counted
+ // in the existing stats. Currently, reused objects are not counted as allocs nor
+ // frees, but instead roughly appear as if the original heap object lived on. We
+ // probably will also want some additional runtime/metrics, and generally think about
+ // user-facing observability & diagnostics, though all this likely can wait for an
+ // official proposal.
+ if writeBarrier.enabled {
+ // Allocate black during GC.
+ // All slots hold nil so no scanning is needed.
+ // This may be racing with GC so do it atomically if there can be
+ // a race marking the bit.
+ gcmarknewobject(span, uintptr(x))
+ }
+ return x
+}
+
func mallocgcSmallScanNoHeader(size uintptr, typ *_type) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
@@ -1816,8 +1885,6 @@ func postMallocgcDebug(x unsafe.Pointer, elemsize uintptr, typ *_type) {
// by size bytes, and assists the GC if necessary.
//
// Caller must be preemptible.
-//
-// Returns the G for which the assist credit was accounted.
func deductAssistCredit(size uintptr) {
// Charge the current user G for this allocation.
assistG := getg()
@@ -1836,6 +1903,267 @@ func deductAssistCredit(size uintptr) {
}
}
+// addAssistCredit is like deductAssistCredit,
+// but adds credit rather than removes,
+// and never calls gcAssistAlloc.
+func addAssistCredit(size uintptr) {
+ // Credit the current user G.
+ assistG := getg()
+ if assistG.m.curg != nil { // TODO(thepudds): do we need to do this?
+ assistG = assistG.m.curg
+ }
+ // Credit the size against the G.
+ assistG.gcAssistBytes += int64(size)
+}
+
+const (
+ // doubleCheckReusable enables some additional invariant checks for the
+ // runtime.freegc and reusable objects. Note that some of these checks alter timing,
+ // and it is good to test changes with and without this enabled.
+ doubleCheckReusable = false
+
+ // debugReusableLog enables some printlns for runtime.freegc and reusable objects.
+ debugReusableLog = false
+)
+
+// freegc records that a heap object is reusable and available for
+// immediate reuse in a subsequent mallocgc allocation, without
+// needing to wait for the GC cycle to progress.
+//
+// The information is recorded in a free list stored in the
+// current P's mcache. The caller must pass in the user size
+// and whether the object has pointers, which allows a faster free
+// operation.
+//
+// freegc must be called by the effective owner of ptr who knows
+// the pointer is logically dead, with no possible aliases that might
+// be used past that moment. In other words, ptr must be the
+// last and only pointer to its referent.
+//
+// The intended caller is the compiler.
+//
+// Note: please do not send changes that attempt to add freegc calls
+// to the standard library.
+//
+// ptr must point to a heap object or into the current g's stack,
+// in which case freegc is a no-op. In particular, ptr must not point
+// to memory in the data or bss sections, which is partially enforced.
+// For objects with a malloc header, ptr should point mallocHeaderSize bytes
+// past the base; otherwise, ptr should point to the base of the heap object.
+// In other words, ptr should be the same pointer that was returned by mallocgc.
+//
+// In addition, the caller must know that ptr's object has no specials, such
+// as might have been created by a call to SetFinalizer or AddCleanup.
+// (Internally, the runtime deals appropriately with internally-created
+// specials, such as specials for memory profiling).
+//
+// If the size of ptr's object is less than 16 bytes or greater than
+// 32KiB - gc.MallocHeaderSize bytes, freegc is currently a no-op. It must only
+// be called in alloc-safe places. It currently throws if noscan is false
+// (support for which is implemented in a later CL in our stack).
+//
+// Note that freegc accepts an unsafe.Pointer and hence keeps the pointer
+// alive. It therefore could be a pessimization in some cases (such
+// as a long-lived function) if the caller does not call freegc before
+// or roughly when the liveness analysis of the compiler
+// would otherwise have determined ptr's object is reclaimable by the GC.
+func freegc(ptr unsafe.Pointer, size uintptr, noscan bool) bool {
+ if !runtimeFreegcEnabled || !reusableSize(size) {
+ return false
+ }
+ if sizeSpecializedMallocEnabled && !noscan {
+ // TODO(thepudds): temporarily disable freegc with SizeSpecializedMalloc for pointer types
+ // until we finish integrating.
+ return false
+ }
+
+ if ptr == nil {
+ throw("freegc nil")
+ }
+
+ // Set mp.mallocing to keep from being preempted by GC.
+ // Otherwise, the GC could flush our mcache or otherwise cause problems.
+ mp := acquirem()
+ if mp.mallocing != 0 {
+ throw("freegc deadlock")
+ }
+ if mp.gsignal == getg() {
+ throw("freegc during signal")
+ }
+ mp.mallocing = 1
+
+ if mp.curg.stack.lo <= uintptr(ptr) && uintptr(ptr) < mp.curg.stack.hi {
+ // This points into our stack, so free is a no-op.
+ mp.mallocing = 0
+ releasem(mp)
+ return false
+ }
+
+ if doubleCheckReusable {
+ // TODO(thepudds): we could enforce no free on globals in bss or data. Maybe by
+ // checking span via spanOf or spanOfHeap, or maybe walk from firstmoduledata
+ // like isGoPointerWithoutSpan, or activeModules, or something. If so, we might
+ // be able to delay checking until reuse (e.g., check span just before reusing,
+ // though currently we don't always need to lookup a span on reuse). If we think
+ // no usage patterns could result in globals, maybe enforcement for globals could
+ // be behind -d=checkptr=1 or similar. The compiler can have knowledge of where
+ // a variable is allocated, but stdlib does not, although there are certain
+ // usage patterns that cannot result in a global.
+ // TODO(thepudds): separately, consider a local debugReusableMcacheOnly here
+ // to ignore freed objects if not in mspan in mcache, maybe when freeing and reading,
+ // by checking something like s.base() <= uintptr(v) && uintptr(v) < s.limit. Or
+ // maybe a GODEBUG or compiler debug flag.
+ span := spanOf(uintptr(ptr))
+ if span == nil {
+ throw("nextReusable: nil span for pointer in free list")
+ }
+ if state := span.state.get(); state != mSpanInUse {
+ throw("nextReusable: span is not in use")
+ }
+ }
+
+ if debug.clobberfree != 0 {
+ clobberfree(ptr, size)
+ }
+
+ // We first check if p is still in our per-P cache.
+ // Get our per-P cache for small objects.
+ c := getMCache(mp)
+ if c == nil {
+ throw("freegc called without a P or outside bootstrapping")
+ }
+
+ v := uintptr(ptr)
+ if !noscan && !heapBitsInSpan(size) {
+ // mallocgcSmallScanHeader expects to get the base address of the object back
+ // from the findReusable funcs (as well as from nextFreeFast and nextFree), and
+ // not mallocHeaderSize bytes into a object, so adjust that here.
+ v -= mallocHeaderSize
+
+ // The size class lookup wants size to be adjusted by mallocHeaderSize.
+ size += mallocHeaderSize
+ }
+
+ // TODO(thepudds): should verify (behind doubleCheckReusable constant) that our calculated
+ // sizeclass here matches what's in span found via spanOf(ptr) or findObject(ptr).
+ var sizeclass uint8
+ if size <= gc.SmallSizeMax-8 {
+ sizeclass = gc.SizeToSizeClass8[divRoundUp(size, gc.SmallSizeDiv)]
+ } else {
+ sizeclass = gc.SizeToSizeClass128[divRoundUp(size-gc.SmallSizeMax, gc.LargeSizeDiv)]
+ }
+
+ spc := makeSpanClass(sizeclass, noscan)
+ s := c.alloc[spc]
+
+ if debugReusableLog {
+ if s.base() <= uintptr(v) && uintptr(v) < s.limit {
+ println("freegc [in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled)
+ } else {
+ println("freegc [NOT in mcache]:", hex(uintptr(v)), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled)
+ }
+ }
+
+ if noscan {
+ c.addReusableNoscan(spc, uintptr(v))
+ } else {
+ // TODO(thepudds): implemented in later CL in our stack.
+ throw("freegc called for object with pointers, not yet implemented")
+ }
+
+ // For stats, for now we leave allocCount alone, roughly pretending to the rest
+ // of the system that this potential reuse never happened.
+
+ mp.mallocing = 0
+ releasem(mp)
+
+ return true
+}
+
+// nextReusableNoScan returns the next reusable object for a noscan span,
+// or 0 if no reusable object is found.
+func (c *mcache) nextReusableNoScan(s *mspan, spc spanClass) (gclinkptr, *mspan) {
+ if !runtimeFreegcEnabled {
+ return 0, s
+ }
+
+ // Pop a reusable pointer from the free list for this span class.
+ v := c.reusableNoscan[spc]
+ if v == 0 {
+ return 0, s
+ }
+ c.reusableNoscan[spc] = v.ptr().next
+
+ if debugReusableLog {
+ println("reusing from ptr free list:", hex(v), "sweepgen:", mheap_.sweepgen, "writeBarrier.enabled:", writeBarrier.enabled)
+ }
+ if doubleCheckReusable {
+ doubleCheckNextReusable(v) // debug only sanity check
+ }
+
+ // For noscan spans, we only need the span if the write barrier is enabled (so that our caller
+ // can call gcmarknewobject to allocate black). If the write barrier is enabled, we can skip
+ // looking up the span when the pointer is in a span in the mcache.
+ if !writeBarrier.enabled {
+ return v, nil
+ }
+ if s.base() <= uintptr(v) && uintptr(v) < s.limit {
+ // Return the original span.
+ return v, s
+ }
+
+ // We must find and return the span.
+ span := spanOf(uintptr(v))
+ if span == nil {
+ // TODO(thepudds): construct a test that triggers this throw.
+ throw("nextReusableNoScan: nil span for pointer in reusable object free list")
+ }
+
+ return v, span
+}
+
+// doubleCheckNextReusable checks some invariants.
+// TODO(thepudds): will probably delete some of this. Can mostly be ignored for review.
+func doubleCheckNextReusable(v gclinkptr) {
+ // TODO(thepudds): should probably take the spanClass as well to confirm expected
+ // sizeclass match.
+ _, span, objIndex := findObject(uintptr(v), 0, 0)
+ if span == nil {
+ throw("nextReusable: nil span for pointer in free list")
+ }
+ if state := span.state.get(); state != mSpanInUse {
+ throw("nextReusable: span is not in use")
+ }
+ if uintptr(v) < span.base() || uintptr(v) >= span.limit {
+ throw("nextReusable: span is not in range")
+ }
+ if span.objBase(uintptr(v)) != uintptr(v) {
+ print("nextReusable: v=", hex(v), " base=", hex(span.objBase(uintptr(v))), "\n")
+ throw("nextReusable: v is non-base-address for object found on pointer free list")
+ }
+ if span.isFree(objIndex) {
+ throw("nextReusable: pointer on free list is free")
+ }
+
+ const debugReusableEnsureSwept = false
+ if debugReusableEnsureSwept {
+ // Currently disabled.
+ // Note: ensureSwept here alters behavior (not just an invariant check).
+ span.ensureSwept()
+ if span.isFree(objIndex) {
+ throw("nextReusable: pointer on free list is free after ensureSwept")
+ }
+ }
+}
+
+// reusableSize reports if size is a currently supported size for a reusable object.
+func reusableSize(size uintptr) bool {
+ if size < maxTinySize || size > maxSmallSize-mallocHeaderSize {
+ return false
+ }
+ return true
+}
+
// memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers
// on chunks of the buffer to be zeroed, with opportunities for preemption
// along the way. memclrNoHeapPointers contains no safepoints and also
diff --git a/src/runtime/malloc_generated.go b/src/runtime/malloc_generated.go
index 2215dbaddb..5abb61257a 100644
--- a/src/runtime/malloc_generated.go
+++ b/src/runtime/malloc_generated.go
@@ -1,4 +1,5 @@
// Code generated by mkmalloc.go; DO NOT EDIT.
+// See overview in malloc_stubs.go.
package runtime
@@ -6400,6 +6401,32 @@ func mallocgcSmallNoScanSC2(size uintptr, typ *_type, needzero bool) unsafe.Poin
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -6497,6 +6524,32 @@ func mallocgcSmallNoScanSC3(size uintptr, typ *_type, needzero bool) unsafe.Poin
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -6594,6 +6647,32 @@ func mallocgcSmallNoScanSC4(size uintptr, typ *_type, needzero bool) unsafe.Poin
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -6691,6 +6770,32 @@ func mallocgcSmallNoScanSC5(size uintptr, typ *_type, needzero bool) unsafe.Poin
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -6788,6 +6893,32 @@ func mallocgcSmallNoScanSC6(size uintptr, typ *_type, needzero bool) unsafe.Poin
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -6885,6 +7016,32 @@ func mallocgcSmallNoScanSC7(size uintptr, typ *_type, needzero bool) unsafe.Poin
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -6982,6 +7139,32 @@ func mallocgcSmallNoScanSC8(size uintptr, typ *_type, needzero bool) unsafe.Poin
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7079,6 +7262,32 @@ func mallocgcSmallNoScanSC9(size uintptr, typ *_type, needzero bool) unsafe.Poin
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7176,6 +7385,32 @@ func mallocgcSmallNoScanSC10(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7273,6 +7508,32 @@ func mallocgcSmallNoScanSC11(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7370,6 +7631,32 @@ func mallocgcSmallNoScanSC12(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7467,6 +7754,32 @@ func mallocgcSmallNoScanSC13(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7564,6 +7877,32 @@ func mallocgcSmallNoScanSC14(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7661,6 +8000,32 @@ func mallocgcSmallNoScanSC15(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7758,6 +8123,32 @@ func mallocgcSmallNoScanSC16(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7855,6 +8246,32 @@ func mallocgcSmallNoScanSC17(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -7952,6 +8369,32 @@ func mallocgcSmallNoScanSC18(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -8049,6 +8492,32 @@ func mallocgcSmallNoScanSC19(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -8146,6 +8615,32 @@ func mallocgcSmallNoScanSC20(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -8243,6 +8738,32 @@ func mallocgcSmallNoScanSC21(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -8340,6 +8861,32 @@ func mallocgcSmallNoScanSC22(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -8437,6 +8984,32 @@ func mallocgcSmallNoScanSC23(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -8534,6 +9107,32 @@ func mallocgcSmallNoScanSC24(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -8631,6 +9230,32 @@ func mallocgcSmallNoScanSC25(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
@@ -8728,6 +9353,32 @@ func mallocgcSmallNoScanSC26(size uintptr, typ *_type, needzero bool) unsafe.Poi
const spc = spanClass(sizeclass<<1) | spanClass(1)
span := c.alloc[spc]
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+ x := v
+ {
+
+ if valgrindenabled {
+ valgrindMalloc(x, size)
+ }
+
+ if gcBlackenEnabled != 0 && elemsize != 0 {
+ if assistG := getg().m.curg; assistG != nil {
+ assistG.gcAssistBytes -= int64(elemsize - size)
+ }
+ }
+
+ if debug.malloc {
+ postMallocgcDebug(x, elemsize, typ)
+ }
+ return x
+ }
+
+ }
+
var nextFreeFastResult gclinkptr
if span.allocCache != 0 {
theBit := sys.TrailingZeros64(span.allocCache)
diff --git a/src/runtime/malloc_stubs.go b/src/runtime/malloc_stubs.go
index 224746f3d4..e9752956b8 100644
--- a/src/runtime/malloc_stubs.go
+++ b/src/runtime/malloc_stubs.go
@@ -7,6 +7,8 @@
// to produce a full mallocgc function that's specialized for a span class
// or specific size in the case of the tiny allocator.
//
+// To generate the specialized mallocgc functions, do 'go run .' inside runtime/_mkmalloc.
+//
// To assemble a mallocgc function, the mallocStub function is cloned, and the call to
// inlinedMalloc is replaced with the inlined body of smallScanNoHeaderStub,
// smallNoScanStub or tinyStub, depending on the parameters being specialized.
@@ -71,7 +73,8 @@ func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
}
}
- // Assist the GC if needed.
+ // Assist the GC if needed. (On the reuse path, we currently compensate for this;
+ // changes here might require changes there.)
if gcBlackenEnabled != 0 {
deductAssistCredit(size)
}
@@ -242,6 +245,23 @@ func smallNoScanStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, u
c := getMCache(mp)
const spc = spanClass(sizeclass<<1) | spanClass(noscanint_)
span := c.alloc[spc]
+
+ // First, check for a reusable object.
+ if runtimeFreegcEnabled && c.hasReusableNoscan(spc) {
+ // We have a reusable object, use it.
+ v := mallocgcSmallNoscanReuse(c, span, spc, elemsize, needzero)
+ mp.mallocing = 0
+ releasem(mp)
+
+ // TODO(thepudds): note that the generated return path is essentially duplicated
+ // by the generator. For example, see the two postMallocgcDebug calls and
+ // related duplicated code on the return path currently in the generated
+ // mallocgcSmallNoScanSC2 function. One set of those correspond to this
+ // return here. We might be able to de-duplicate the generated return path
+ // by updating the generator, perhaps by jumping to a shared return or similar.
+ return v, elemsize
+ }
+
v := nextFreeFastStub(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go
index bf58947bbc..97cf0eed54 100644
--- a/src/runtime/malloc_test.go
+++ b/src/runtime/malloc_test.go
@@ -16,6 +16,7 @@ import (
"runtime"
. "runtime"
"strings"
+ "sync"
"sync/atomic"
"testing"
"time"
@@ -234,6 +235,364 @@ func TestTinyAllocIssue37262(t *testing.T) {
runtime.Releasem()
}
+// TestFreegc does basic testing of explicit frees.
+func TestFreegc(t *testing.T) {
+ tests := []struct {
+ size string
+ f func(noscan bool) func(*testing.T)
+ noscan bool
+ }{
+ // Types without pointers.
+ {"size=16", testFreegc[[16]byte], true}, // smallest we support currently
+ {"size=17", testFreegc[[17]byte], true},
+ {"size=64", testFreegc[[64]byte], true},
+ {"size=500", testFreegc[[500]byte], true},
+ {"size=512", testFreegc[[512]byte], true},
+ {"size=4096", testFreegc[[4096]byte], true},
+ {"size=20000", testFreegc[[20000]byte], true}, // not power of 2 or spc boundary
+ {"size=32KiB-8", testFreegc[[1<<15 - 8]byte], true}, // max noscan small object for 64-bit
+ }
+
+ // Run the tests twice if not in -short mode or not otherwise saving test time.
+ // First while manually calling runtime.GC to slightly increase isolation (perhaps making
+ // problems more reproducible).
+ for _, tt := range tests {
+ runtime.GC()
+ t.Run(fmt.Sprintf("gc=yes/ptrs=%v/%s", !tt.noscan, tt.size), tt.f(tt.noscan))
+ }
+ runtime.GC()
+
+ if testing.Short() || !RuntimeFreegcEnabled || runtime.Raceenabled {
+ return
+ }
+
+ // Again, but without manually calling runtime.GC in the loop (perhaps less isolation might
+ // trigger problems).
+ for _, tt := range tests {
+ t.Run(fmt.Sprintf("gc=no/ptrs=%v/%s", !tt.noscan, tt.size), tt.f(tt.noscan))
+ }
+ runtime.GC()
+}
+
+func testFreegc[T comparable](noscan bool) func(*testing.T) {
+ // We use stressMultiple to influence the duration of the tests.
+ // When testing freegc changes, stressMultiple can be increased locally
+ // to test longer or in some cases with more goroutines.
+ // It can also be helpful to test with GODEBUG=clobberfree=1 and
+ // with and without doubleCheckMalloc and doubleCheckReusable enabled.
+ stressMultiple := 10
+ if testing.Short() || !RuntimeFreegcEnabled || runtime.Raceenabled {
+ stressMultiple = 1
+ }
+
+ return func(t *testing.T) {
+ alloc := func() *T {
+ // Force heap alloc, plus some light validation of zeroed memory.
+ t.Helper()
+ p := Escape(new(T))
+ var zero T
+ if *p != zero {
+ t.Fatalf("allocator returned non-zero memory: %v", *p)
+ }
+ return p
+ }
+
+ free := func(p *T) {
+ t.Helper()
+ var zero T
+ if *p != zero {
+ t.Fatalf("found non-zero memory before freegc (tests do not modify memory): %v", *p)
+ }
+ runtime.Freegc(unsafe.Pointer(p), unsafe.Sizeof(*p), noscan)
+ }
+
+ t.Run("basic-free", func(t *testing.T) {
+ // Test that freeing a live heap object doesn't crash.
+ for range 100 {
+ p := alloc()
+ free(p)
+ }
+ })
+
+ t.Run("stack-free", func(t *testing.T) {
+ // Test that freeing a stack object doesn't crash.
+ for range 100 {
+ var x [32]byte
+ var y [32]*int
+ runtime.Freegc(unsafe.Pointer(&x), unsafe.Sizeof(x), true) // noscan
+ runtime.Freegc(unsafe.Pointer(&y), unsafe.Sizeof(y), false) // !noscan
+ }
+ })
+
+ // Check our allocations. These tests rely on the
+ // current implementation treating a re-used object
+ // as not adding to the allocation counts seen
+ // by testing.AllocsPerRun. (This is not the desired
+ // long-term behavior, but it is the current behavior and
+ // makes these tests convenient).
+
+ t.Run("allocs-baseline", func(t *testing.T) {
+ // Baseline result without any explicit free.
+ allocs := testing.AllocsPerRun(100, func() {
+ for range 100 {
+ p := alloc()
+ _ = p
+ }
+ })
+ if allocs < 100 {
+ // TODO(thepudds): we get exactly 100 for almost all the tests, but investigate why
+ // ~101 allocs for TestFreegc/ptrs=true/size=32KiB-8.
+ t.Fatalf("expected >=100 allocations, got %v", allocs)
+ }
+ })
+
+ t.Run("allocs-with-free", func(t *testing.T) {
+ // Same allocations, but now using explicit free so that
+ // no allocs get reported. (Again, not the desired long-term behavior).
+ if SizeSpecializedMallocEnabled && !noscan {
+ // TODO(thepudds): skip at this point in the stack for size-specialized malloc
+ // with !noscan. Additional integration with sizespecializedmalloc is in a later CL.
+ t.Skip("temporarily skipping alloc tests for GOEXPERIMENT=sizespecializedmalloc for pointer types")
+ }
+ if !RuntimeFreegcEnabled {
+ t.Skip("skipping alloc tests with runtime.freegc disabled")
+ }
+ allocs := testing.AllocsPerRun(100, func() {
+ for range 100 {
+ p := alloc()
+ free(p)
+ }
+ })
+ if allocs != 0 {
+ t.Fatalf("expected 0 allocations, got %v", allocs)
+ }
+ })
+
+ t.Run("free-multiple", func(t *testing.T) {
+ // Multiple allocations outstanding before explicitly freeing,
+ // but still within the limit of our smallest free list size
+ // so that no allocs are reported. (Again, not long-term behavior).
+ if SizeSpecializedMallocEnabled && !noscan {
+ // TODO(thepudds): skip at this point in the stack for size-specialized malloc
+ // with !noscan. Additional integration with sizespecializedmalloc is in a later CL.
+ t.Skip("temporarily skipping alloc tests for GOEXPERIMENT=sizespecializedmalloc for pointer types")
+ }
+ if !RuntimeFreegcEnabled {
+ t.Skip("skipping alloc tests with runtime.freegc disabled")
+ }
+ const maxOutstanding = 20
+ s := make([]*T, 0, maxOutstanding)
+ allocs := testing.AllocsPerRun(100*stressMultiple, func() {
+ s = s[:0]
+ for range maxOutstanding {
+ p := alloc()
+ s = append(s, p)
+ }
+ for _, p := range s {
+ free(p)
+ }
+ })
+ if allocs != 0 {
+ t.Fatalf("expected 0 allocations, got %v", allocs)
+ }
+ })
+
+ if runtime.GOARCH == "wasm" {
+ // TODO(thepudds): for wasm, double-check if just slow, vs. some test logic problem,
+ // vs. something else. It might have been wasm was slowest with tests that spawn
+ // many goroutines, which might be expected for wasm. This skip might no longer be
+ // needed now that we have tuned test execution time more, or perhaps wasm should just
+ // always run in short mode, which might also let us remove this skip.
+ t.Skip("skipping remaining freegc tests, was timing out on wasm")
+ }
+
+ t.Run("free-many", func(t *testing.T) {
+ // Confirm we are graceful if we have more freed elements at once
+ // than the max free list size.
+ s := make([]*T, 0, 1000)
+ iterations := stressMultiple * stressMultiple // currently 1 (-short) or 100
+ for range iterations {
+ s = s[:0]
+ for range 1000 {
+ p := alloc()
+ s = append(s, p)
+ }
+ for _, p := range s {
+ free(p)
+ }
+ }
+ })
+
+ t.Run("duplicate-check", func(t *testing.T) {
+ // A simple duplicate allocation test. We track what should be the set
+ // of live pointers in a map across a series of allocs and frees,
+ // and fail if a live pointer value is returned by an allocation.
+ // TODO: maybe add randomness? allow more live pointers? do across goroutines?
+ live := make(map[uintptr]bool)
+ for i := range 100 * stressMultiple {
+ var s []*T
+ // Alloc 10 times, tracking the live pointer values.
+ for j := range 10 {
+ p := alloc()
+ uptr := uintptr(unsafe.Pointer(p))
+ if live[uptr] {
+ t.Fatalf("found duplicate pointer (0x%x). i: %d j: %d", uptr, i, j)
+ }
+ live[uptr] = true
+ s = append(s, p)
+ }
+ // Explicitly free those pointers, removing them from the live map.
+ for k := range s {
+ p := s[k]
+ s[k] = nil
+ uptr := uintptr(unsafe.Pointer(p))
+ free(p)
+ delete(live, uptr)
+ }
+ }
+ })
+
+ t.Run("free-other-goroutine", func(t *testing.T) {
+ // Use explicit free, but the free happens on a different goroutine than the alloc.
+ // This also lightly simulates how the free code sees P migration or flushing
+ // the mcache, assuming we have > 1 P. (Not using testing.AllocsPerRun here).
+ iterations := 10 * stressMultiple * stressMultiple // currently 10 (-short) or 1000
+ for _, capacity := range []int{2} {
+ for range iterations {
+ ch := make(chan *T, capacity)
+ var wg sync.WaitGroup
+ for range 2 {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ for p := range ch {
+ free(p)
+ }
+ }()
+ }
+ for range 100 {
+ p := alloc()
+ ch <- p
+ }
+ close(ch)
+ wg.Wait()
+ }
+ }
+ })
+
+ t.Run("many-goroutines", func(t *testing.T) {
+ // Allocate across multiple goroutines, freeing on the same goroutine.
+ // TODO: probably remove the duplicate checking here; not that useful.
+ counts := []int{1, 2, 4, 8, 10 * stressMultiple}
+ for _, goroutines := range counts {
+ var wg sync.WaitGroup
+ for range goroutines {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ live := make(map[uintptr]bool)
+ for range 100 * stressMultiple {
+ p := alloc()
+ uptr := uintptr(unsafe.Pointer(p))
+ if live[uptr] {
+ panic("TestFreeLive: found duplicate pointer")
+ }
+ live[uptr] = true
+ free(p)
+ delete(live, uptr)
+ }
+ }()
+ }
+ wg.Wait()
+ }
+ })
+
+ t.Run("assist-credit", func(t *testing.T) {
+ // Allocate and free using the same span class repeatedly while
+ // verifying it results in a net zero change in assist credit.
+ // This helps double-check our manipulation of the assist credit
+ // during mallocgc/freegc, including in cases when there is
+ // internal fragmentation when the requested mallocgc size is
+ // smaller than the size class.
+ //
+ // See https://go.dev/cl/717520 for some additional discussion,
+ // including how we can deliberately cause the test to fail currently
+ // if we purposefully introduce some assist credit bugs.
+ if SizeSpecializedMallocEnabled && !noscan {
+ // TODO(thepudds): skip this test at this point in the stack; later CL has
+ // integration with sizespecializedmalloc.
+ t.Skip("temporarily skip assist credit tests for GOEXPERIMENT=sizespecializedmalloc for pointer types")
+ }
+ if !RuntimeFreegcEnabled {
+ t.Skip("skipping assist credit test with runtime.freegc disabled")
+ }
+
+ // Use a background goroutine to continuously run the GC.
+ done := make(chan struct{})
+ defer close(done)
+ go func() {
+ for {
+ select {
+ case <-done:
+ return
+ default:
+ runtime.GC()
+ }
+ }
+ }()
+
+ // If making changes related to this test, consider testing locally with
+ // larger counts, like 100K or 1M.
+ counts := []int{1, 2, 10, 100 * stressMultiple}
+ // Dropping down to GOMAXPROCS=1 might help reduce noise.
+ defer GOMAXPROCS(GOMAXPROCS(1))
+ size := int64(unsafe.Sizeof(*new(T)))
+ for _, count := range counts {
+ // Start by forcing a GC to reset this g's assist credit
+ // and perhaps help us get a cleaner measurement of GC cycle count.
+ runtime.GC()
+ for i := range count {
+ // We disable preemption to reduce other code's ability to adjust this g's
+ // assist credit or otherwise change things while we are measuring.
+ Acquirem()
+
+ // We do two allocations per loop, with the second allocation being
+ // the one we measure. The first allocation tries to ensure at least one
+ // reusable object on the mspan's free list when we do our measured allocation.
+ p := alloc()
+ free(p)
+
+ // Now do our primary allocation of interest, bracketed by measurements.
+ // We measure more than we strictly need (to log details in case of a failure).
+ creditStart := AssistCredit()
+ blackenStart := GcBlackenEnable()
+ p = alloc()
+ blackenAfterAlloc := GcBlackenEnable()
+ creditAfterAlloc := AssistCredit()
+ free(p)
+ blackenEnd := GcBlackenEnable()
+ creditEnd := AssistCredit()
+
+ Releasem()
+ GoschedIfBusy()
+
+ delta := creditEnd - creditStart
+ if delta != 0 {
+ t.Logf("assist credit non-zero delta: %d", delta)
+ t.Logf("\t| size: %d i: %d count: %d", size, i, count)
+ t.Logf("\t| credit before: %d credit after: %d", creditStart, creditEnd)
+ t.Logf("\t| alloc delta: %d free delta: %d",
+ creditAfterAlloc-creditStart, creditEnd-creditAfterAlloc)
+ t.Logf("\t| gcBlackenEnable (start / after alloc / end): %v/%v/%v",
+ blackenStart, blackenAfterAlloc, blackenEnd)
+ t.FailNow()
+ }
+ }
+ }
+ })
+ }
+}
+
func TestPageCacheLeak(t *testing.T) {
defer GOMAXPROCS(GOMAXPROCS(1))
leaked := PageCachePagesLeaked()
@@ -337,6 +696,13 @@ func BenchmarkMalloc16(b *testing.B) {
}
}
+func BenchmarkMalloc32(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ p := new([4]int64)
+ Escape(p)
+ }
+}
+
func BenchmarkMallocTypeInfo8(b *testing.B) {
for i := 0; i < b.N; i++ {
p := new(struct {
@@ -355,6 +721,15 @@ func BenchmarkMallocTypeInfo16(b *testing.B) {
}
}
+func BenchmarkMallocTypeInfo32(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ p := new(struct {
+ p [32 / unsafe.Sizeof(uintptr(0))]*int
+ })
+ Escape(p)
+ }
+}
+
type LargeStruct struct {
x [16][]byte
}
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index cade81031d..82872f1454 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -44,7 +44,17 @@ type mcache struct {
// The rest is not accessed on every malloc.
- alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass
+ // alloc contains spans to allocate from, indexed by spanClass.
+ alloc [numSpanClasses]*mspan
+
+ // TODO(thepudds): better to interleave alloc and reusableScan/reusableNoscan so that
+ // a single malloc call can often access both in the same cache line for a given spanClass.
+ // It's not interleaved right now in part to have slightly smaller diff, and might be
+ // negligible effect on current microbenchmarks.
+
+ // reusableNoscan contains linked lists of reusable noscan heap objects, indexed by spanClass.
+ // The next pointers are stored in the first word of the heap objects.
+ reusableNoscan [numSpanClasses]gclinkptr
stackcache [_NumStackOrders]stackfreelist
@@ -96,6 +106,7 @@ func allocmcache() *mcache {
c.alloc[i] = &emptymspan
}
c.nextSample = nextSample()
+
return c
}
@@ -153,6 +164,16 @@ func (c *mcache) refill(spc spanClass) {
if s.allocCount != s.nelems {
throw("refill of span with free space remaining")
}
+
+ // TODO(thepudds): we might be able to allow mallocgcTiny to reuse 16 byte objects from spc==5,
+ // but for now, just clear our reusable objects for tinySpanClass.
+ if spc == tinySpanClass {
+ c.reusableNoscan[spc] = 0
+ }
+ if c.reusableNoscan[spc] != 0 {
+ throw("refill of span with reusable pointers remaining on pointer free list")
+ }
+
if s != &emptymspan {
// Mark this span as no longer cached.
if s.sweepgen != mheap_.sweepgen+3 {
@@ -312,6 +333,13 @@ func (c *mcache) releaseAll() {
c.tinyAllocs = 0
memstats.heapStats.release()
+ // Clear the reusable linked lists.
+ // For noscan objects, the nodes of the linked lists are the reusable heap objects themselves,
+ // so we can simply clear the linked list head pointers.
+ // TODO(thepudds): consider having debug logging of a non-empty reusable lists getting cleared,
+ // maybe based on the existing debugReusableLog.
+ clear(c.reusableNoscan[:])
+
// Update heapLive and heapScan.
gcController.update(dHeapLive, scanAlloc)
}
@@ -339,3 +367,25 @@ func (c *mcache) prepareForSweep() {
stackcache_clear(c)
c.flushGen.Store(mheap_.sweepgen) // Synchronizes with gcStart
}
+
+// addReusableNoscan adds a noscan object pointer to the reusable pointer free list
+// for a span class.
+func (c *mcache) addReusableNoscan(spc spanClass, ptr uintptr) {
+ if !runtimeFreegcEnabled {
+ return
+ }
+
+ // Add to the reusable pointers free list.
+ v := gclinkptr(ptr)
+ v.ptr().next = c.reusableNoscan[spc]
+ c.reusableNoscan[spc] = v
+}
+
+// hasReusableNoscan reports whether there is a reusable object available for
+// a noscan spc.
+func (c *mcache) hasReusableNoscan(spc spanClass) bool {
+ if !runtimeFreegcEnabled {
+ return false
+ }
+ return c.reusableNoscan[spc] != 0
+}
diff --git a/src/runtime/mcleanup.go b/src/runtime/mcleanup.go
index 383217aa05..fc71af9f3f 100644
--- a/src/runtime/mcleanup.go
+++ b/src/runtime/mcleanup.go
@@ -72,8 +72,9 @@ import (
// pass the object to the [KeepAlive] function after the last point
// where the object must remain reachable.
func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
- // Explicitly force ptr to escape to the heap.
+ // Explicitly force ptr and cleanup to escape to the heap.
ptr = abi.Escape(ptr)
+ cleanup = abi.Escape(cleanup)
// The pointer to the object must be valid.
if ptr == nil {
@@ -82,7 +83,8 @@ func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
usptr := uintptr(unsafe.Pointer(ptr))
// Check that arg is not equal to ptr.
- if kind := abi.TypeOf(arg).Kind(); kind == abi.Pointer || kind == abi.UnsafePointer {
+ argType := abi.TypeOf(arg)
+ if kind := argType.Kind(); kind == abi.Pointer || kind == abi.UnsafePointer {
if unsafe.Pointer(ptr) == *((*unsafe.Pointer)(unsafe.Pointer(&arg))) {
panic("runtime.AddCleanup: ptr is equal to arg, cleanup will never run")
}
@@ -98,12 +100,23 @@ func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
return Cleanup{}
}
- fn := func() {
- cleanup(arg)
+ // Create new storage for the argument.
+ var argv *S
+ if size := unsafe.Sizeof(arg); size < maxTinySize && argType.PtrBytes == 0 {
+ // Side-step the tiny allocator to avoid liveness issues, since this box
+ // will be treated like a root by the GC. We model the box as an array of
+ // uintptrs to guarantee maximum allocator alignment.
+ //
+ // TODO(mknyszek): Consider just making space in cleanupFn for this. The
+ // unfortunate part of this is it would grow specialCleanup by 16 bytes, so
+ // while there wouldn't be an allocation, *every* cleanup would take the
+ // memory overhead hit.
+ box := new([maxTinySize / goarch.PtrSize]uintptr)
+ argv = (*S)(unsafe.Pointer(box))
+ } else {
+ argv = new(S)
}
- // Closure must escape.
- fv := *(**funcval)(unsafe.Pointer(&fn))
- fv = abi.Escape(fv)
+ *argv = arg
// Find the containing object.
base, _, _ := findObject(usptr, 0, 0)
@@ -120,7 +133,16 @@ func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
gcCleanups.createGs()
}
- id := addCleanup(unsafe.Pointer(ptr), fv)
+ id := addCleanup(unsafe.Pointer(ptr), cleanupFn{
+ // Instantiate a caller function to call the cleanup, that is cleanup(*argv).
+ //
+ // TODO(mknyszek): This allocates because the generic dictionary argument
+ // gets closed over, but callCleanup doesn't even use the dictionary argument,
+ // so theoretically that could be removed, eliminating an allocation.
+ call: callCleanup[S],
+ fn: *(**funcval)(unsafe.Pointer(&cleanup)),
+ arg: unsafe.Pointer(argv),
+ })
if debug.checkfinalizers != 0 {
cleanupFn := *(**funcval)(unsafe.Pointer(&cleanup))
setCleanupContext(unsafe.Pointer(ptr), abi.TypeFor[T](), sys.GetCallerPC(), cleanupFn.fn, id)
@@ -131,6 +153,16 @@ func AddCleanup[T, S any](ptr *T, cleanup func(S), arg S) Cleanup {
}
}
+// callCleanup is a helper for calling cleanups in a polymorphic way.
+//
+// In practice, all it does is call fn(*arg). arg must be a *T.
+//
+//go:noinline
+func callCleanup[T any](fn *funcval, arg unsafe.Pointer) {
+ cleanup := *(*func(T))(unsafe.Pointer(&fn))
+ cleanup(*(*T)(arg))
+}
+
// Cleanup is a handle to a cleanup call for a specific object.
type Cleanup struct {
// id is the unique identifier for the cleanup within the arena.
@@ -216,7 +248,17 @@ const cleanupBlockSize = 512
// that the cleanup queue does not grow during marking (but it can shrink).
type cleanupBlock struct {
cleanupBlockHeader
- cleanups [(cleanupBlockSize - unsafe.Sizeof(cleanupBlockHeader{})) / goarch.PtrSize]*funcval
+ cleanups [(cleanupBlockSize - unsafe.Sizeof(cleanupBlockHeader{})) / unsafe.Sizeof(cleanupFn{})]cleanupFn
+}
+
+var cleanupFnPtrMask = [...]uint8{0b111}
+
+// cleanupFn represents a cleanup function with it's argument, yet to be called.
+type cleanupFn struct {
+ // call is an adapter function that understands how to safely call fn(*arg).
+ call func(*funcval, unsafe.Pointer)
+ fn *funcval // cleanup function passed to AddCleanup.
+ arg unsafe.Pointer // pointer to argument to pass to cleanup function.
}
var cleanupBlockPtrMask [cleanupBlockSize / goarch.PtrSize / 8]byte
@@ -245,8 +287,8 @@ type cleanupBlockHeader struct {
//
// Must only be called if the GC is in the sweep phase (gcphase == _GCoff),
// because it does not synchronize with the garbage collector.
-func (b *cleanupBlock) enqueue(fn *funcval) bool {
- b.cleanups[b.n] = fn
+func (b *cleanupBlock) enqueue(c cleanupFn) bool {
+ b.cleanups[b.n] = c
b.n++
return b.full()
}
@@ -375,7 +417,7 @@ func (q *cleanupQueue) tryTakeWork() bool {
// enqueue queues a single cleanup for execution.
//
// Called by the sweeper, and only the sweeper.
-func (q *cleanupQueue) enqueue(fn *funcval) {
+func (q *cleanupQueue) enqueue(c cleanupFn) {
mp := acquirem()
pp := mp.p.ptr()
b := pp.cleanups
@@ -396,7 +438,7 @@ func (q *cleanupQueue) enqueue(fn *funcval) {
}
pp.cleanups = b
}
- if full := b.enqueue(fn); full {
+ if full := b.enqueue(c); full {
q.full.push(&b.lfnode)
pp.cleanups = nil
q.addWork(1)
@@ -641,7 +683,8 @@ func runCleanups() {
gcCleanups.beginRunningCleanups()
for i := 0; i < int(b.n); i++ {
- fn := b.cleanups[i]
+ c := b.cleanups[i]
+ b.cleanups[i] = cleanupFn{}
var racectx uintptr
if raceenabled {
@@ -650,20 +693,15 @@ func runCleanups() {
// the same goroutine.
//
// Synchronize on fn. This would fail to find races on the
- // closed-over values in fn (suppose fn is passed to multiple
- // AddCleanup calls) if fn was not unique, but it is. Update
- // the synchronization on fn if you intend to optimize it
- // and store the cleanup function and cleanup argument on the
- // queue directly.
- racerelease(unsafe.Pointer(fn))
+ // closed-over values in fn (suppose arg is passed to multiple
+ // AddCleanup calls) if arg was not unique, but it is.
+ racerelease(unsafe.Pointer(c.arg))
racectx = raceEnterNewCtx()
- raceacquire(unsafe.Pointer(fn))
+ raceacquire(unsafe.Pointer(c.arg))
}
// Execute the next cleanup.
- cleanup := *(*func())(unsafe.Pointer(&fn))
- cleanup()
- b.cleanups[i] = nil
+ c.call(c.fn, c.arg)
if raceenabled {
// Restore the old context.
diff --git a/src/runtime/mcleanup_test.go b/src/runtime/mcleanup_test.go
index 22b9eccd20..341d30afa7 100644
--- a/src/runtime/mcleanup_test.go
+++ b/src/runtime/mcleanup_test.go
@@ -336,3 +336,31 @@ func TestCleanupLost(t *testing.T) {
t.Errorf("expected %d cleanups to be executed, got %d", got, want)
}
}
+
+// BenchmarkAddCleanupAndStop benchmarks adding and removing a cleanup
+// from the same allocation.
+//
+// At face value, this benchmark is unrealistic, since no program would
+// do this in practice. However, adding cleanups to new allocations in a
+// loop is also unrealistic. It adds additional unused allocations,
+// exercises uncommon performance pitfalls in AddCleanup (traversing the
+// specials list, which should just be its own benchmark), and executing
+// cleanups at a frequency that is unlikely to appear in real programs.
+//
+// This benchmark is still useful however, since we can get a low-noise
+// measurement of the cost of AddCleanup and Stop all in one without the
+// above pitfalls: we can measure the pure overhead. We can then separate
+// out the cost of each in CPU profiles if we so choose (they're not so
+// inexpensive as to make this infeasible).
+func BenchmarkAddCleanupAndStop(b *testing.B) {
+ b.ReportAllocs()
+
+ type T struct {
+ v int
+ p unsafe.Pointer
+ }
+ x := new(T)
+ for b.Loop() {
+ runtime.AddCleanup(x, func(int) {}, 14).Stop()
+ }
+}
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 43afbc330b..febcd9558c 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1727,7 +1727,13 @@ func gcBgMarkWorker(ready chan struct{}) {
// the stack (see gopark). Prevent deadlock from recursively
// starting GC by disabling preemption.
gp.m.preemptoff = "GC worker init"
- node := &new(gcBgMarkWorkerNodePadded).gcBgMarkWorkerNode // TODO: technically not allowed in the heap. See comment in tagptr.go.
+ // TODO: This is technically not allowed in the heap. See comment in tagptr.go.
+ //
+ // It is kept alive simply by virtue of being used in the infinite loop
+ // below. gcBgMarkWorkerPool keeps pointers to nodes that are not
+ // GC-visible, so this must be kept alive indefinitely (even if
+ // GOMAXPROCS decreases).
+ node := &new(gcBgMarkWorkerNodePadded).gcBgMarkWorkerNode
gp.m.preemptoff = ""
node.gp.set(gp)
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index dd76973c62..714b9a51df 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -204,7 +204,7 @@ func gcMarkRootCheck() {
})
}
-// ptrmask for an allocation containing a single pointer.
+// oneptrmask for an allocation containing a single pointer.
var oneptrmask = [...]uint8{1}
// markroot scans the i'th root.
@@ -251,7 +251,7 @@ func markroot(gcw *gcWork, i uint32, flushBgCredit bool) int64 {
// N.B. This only needs to synchronize with cleanup execution, which only resets these blocks.
// All cleanup queueing happens during sweep.
n := uintptr(atomic.Load(&cb.n))
- scanblock(uintptr(unsafe.Pointer(&cb.cleanups[0])), n*goarch.PtrSize, &cleanupBlockPtrMask[0], gcw, nil)
+ scanblock(uintptr(unsafe.Pointer(&cb.cleanups[0])), n*unsafe.Sizeof(cleanupFn{}), &cleanupBlockPtrMask[0], gcw, nil)
}
case work.baseSpans <= i && i < work.baseStacks:
@@ -489,7 +489,7 @@ func gcScanFinalizer(spf *specialfinalizer, s *mspan, gcw *gcWork) {
// gcScanCleanup scans the relevant parts of a cleanup special as a root.
func gcScanCleanup(spc *specialCleanup, gcw *gcWork) {
// The special itself is a root.
- scanblock(uintptr(unsafe.Pointer(&spc.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
+ scanblock(uintptr(unsafe.Pointer(&spc.cleanup)), unsafe.Sizeof(cleanupFn{}), &cleanupFnPtrMask[0], gcw, nil)
}
// gcAssistAlloc performs GC work to make gp's assist debt positive.
@@ -1524,29 +1524,32 @@ func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackSca
if debugScanConservative {
printlock()
print("conservatively scanning [", hex(b), ",", hex(b+n), ")\n")
- hexdumpWords(b, b+n, func(p uintptr) byte {
+ hexdumpWords(b, n, func(p uintptr, m hexdumpMarker) {
if ptrmask != nil {
word := (p - b) / goarch.PtrSize
bits := *addb(ptrmask, word/8)
if (bits>>(word%8))&1 == 0 {
- return '$'
+ return
}
}
val := *(*uintptr)(unsafe.Pointer(p))
if state != nil && state.stack.lo <= val && val < state.stack.hi {
- return '@'
+ m.start()
+ println("ptr to stack")
+ return
}
span := spanOfHeap(val)
if span == nil {
- return ' '
+ return
}
idx := span.objIndex(val)
if span.isFreeOrNewlyAllocated(idx) {
- return ' '
+ return
}
- return '*'
+ m.start()
+ println("ptr to heap")
})
printunlock()
}
diff --git a/src/runtime/mgcmark_greenteagc.go b/src/runtime/mgcmark_greenteagc.go
index 3594b33cfd..fa560f9966 100644
--- a/src/runtime/mgcmark_greenteagc.go
+++ b/src/runtime/mgcmark_greenteagc.go
@@ -978,7 +978,9 @@ func spanSetScans(spanBase uintptr, nelems uint16, imb *spanInlineMarkBits, toSc
}
func scanObjectSmall(spanBase, b, objSize uintptr, gcw *gcWork) {
- ptrBits := heapBitsSmallForAddrInline(spanBase, b, objSize)
+ hbitsBase, _ := spanHeapBitsRange(spanBase, gc.PageSize, objSize)
+ hbits := (*byte)(unsafe.Pointer(hbitsBase))
+ ptrBits := extractHeapBitsSmall(hbits, spanBase, b, objSize)
gcw.heapScanWork += int64(sys.Len64(uint64(ptrBits)) * goarch.PtrSize)
nptrs := 0
n := sys.OnesCount64(uint64(ptrBits))
@@ -1017,12 +1019,14 @@ func scanObjectsSmall(base, objSize uintptr, elems uint16, gcw *gcWork, scans *g
break
}
n := sys.OnesCount64(uint64(bits))
+ hbitsBase, _ := spanHeapBitsRange(base, gc.PageSize, objSize)
+ hbits := (*byte)(unsafe.Pointer(hbitsBase))
for range n {
j := sys.TrailingZeros64(uint64(bits))
bits &^= 1 << j
b := base + uintptr(i*(goarch.PtrSize*8)+j)*objSize
- ptrBits := heapBitsSmallForAddrInline(base, b, objSize)
+ ptrBits := extractHeapBitsSmall(hbits, base, b, objSize)
gcw.heapScanWork += int64(sys.Len64(uint64(ptrBits)) * goarch.PtrSize)
n := sys.OnesCount64(uint64(ptrBits))
@@ -1056,10 +1060,7 @@ func scanObjectsSmall(base, objSize uintptr, elems uint16, gcw *gcWork, scans *g
}
}
-func heapBitsSmallForAddrInline(spanBase, addr, elemsize uintptr) uintptr {
- hbitsBase, _ := spanHeapBitsRange(spanBase, gc.PageSize, elemsize)
- hbits := (*byte)(unsafe.Pointer(hbitsBase))
-
+func extractHeapBitsSmall(hbits *byte, spanBase, addr, elemsize uintptr) uintptr {
// These objects are always small enough that their bitmaps
// fit in a single word, so just load the word or two we need.
//
diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go
index 32c1b941e5..388cce83cd 100644
--- a/src/runtime/mgcpacer.go
+++ b/src/runtime/mgcpacer.go
@@ -10,7 +10,7 @@ import (
"internal/runtime/atomic"
"internal/runtime/math"
"internal/strconv"
- _ "unsafe" // for go:linkname
+ _ "unsafe"
)
const (
@@ -749,30 +749,33 @@ func (c *gcControllerState) enlistWorker() {
}
}
-// findRunnableGCWorker returns a background mark worker for pp if it
-// should be run. This must only be called when gcBlackenEnabled != 0.
-func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
+// assignWaitingGCWorker assigns a background mark worker to pp if one should
+// be run.
+//
+// If a worker is selected, it is assigned to pp.nextMarkGCWorker and the P is
+// wired as a GC mark worker. The G is still in _Gwaiting. If no worker is
+// selected, ok returns false.
+//
+// If assignedWaitingGCWorker returns true, this P must either:
+// - Mark the G as runnable and run it, clearing pp.nextMarkGCWorker.
+// - Or, call c.releaseNextGCMarkWorker.
+//
+// This must only be called when gcBlackenEnabled != 0.
+func (c *gcControllerState) assignWaitingGCWorker(pp *p, now int64) (bool, int64) {
if gcBlackenEnabled == 0 {
throw("gcControllerState.findRunnable: blackening not enabled")
}
- // Since we have the current time, check if the GC CPU limiter
- // hasn't had an update in a while. This check is necessary in
- // case the limiter is on but hasn't been checked in a while and
- // so may have left sufficient headroom to turn off again.
if now == 0 {
now = nanotime()
}
- if gcCPULimiter.needUpdate(now) {
- gcCPULimiter.update(now)
- }
if !gcShouldScheduleWorker(pp) {
// No good reason to schedule a worker. This can happen at
// the end of the mark phase when there are still
// assists tapering off. Don't bother running a worker
// now because it'll just return immediately.
- return nil, now
+ return false, now
}
if c.dedicatedMarkWorkersNeeded.Load() <= 0 && c.fractionalUtilizationGoal == 0 {
@@ -783,7 +786,7 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
// When a dedicated worker stops running, the gcBgMarkWorker loop notes
// the need for the worker before returning it to the pool. If we don't
// see the need now, we wouldn't have found it in the pool anyway.
- return nil, now
+ return false, now
}
// Grab a worker before we commit to running below.
@@ -800,7 +803,7 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
// it will always do so with queued global work. Thus, that P
// will be immediately eligible to re-run the worker G it was
// just using, ensuring work can complete.
- return nil, now
+ return false, now
}
decIfPositive := func(val *atomic.Int64) bool {
@@ -823,7 +826,7 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
} else if c.fractionalUtilizationGoal == 0 {
// No need for fractional workers.
gcBgMarkWorkerPool.push(&node.node)
- return nil, now
+ return false, now
} else {
// Is this P behind on the fractional utilization
// goal?
@@ -833,12 +836,51 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
if delta > 0 && float64(pp.gcFractionalMarkTime.Load())/float64(delta) > c.fractionalUtilizationGoal {
// Nope. No need to run a fractional worker.
gcBgMarkWorkerPool.push(&node.node)
- return nil, now
+ return false, now
}
// Run a fractional worker.
pp.gcMarkWorkerMode = gcMarkWorkerFractionalMode
}
+ pp.nextGCMarkWorker = node
+ return true, now
+}
+
+// findRunnableGCWorker returns a background mark worker for pp if it
+// should be run.
+//
+// If findRunnableGCWorker returns a G, this P is wired as a GC mark worker and
+// must run the G.
+//
+// This must only be called when gcBlackenEnabled != 0.
+//
+// This function is allowed to have write barriers because it is called from
+// the portion of findRunnable that always has a P.
+//
+//go:yeswritebarrierrec
+func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
+ // Since we have the current time, check if the GC CPU limiter
+ // hasn't had an update in a while. This check is necessary in
+ // case the limiter is on but hasn't been checked in a while and
+ // so may have left sufficient headroom to turn off again.
+ if now == 0 {
+ now = nanotime()
+ }
+ if gcCPULimiter.needUpdate(now) {
+ gcCPULimiter.update(now)
+ }
+
+ // If a worker wasn't already assigned by procresize, assign one now.
+ if pp.nextGCMarkWorker == nil {
+ ok, now := c.assignWaitingGCWorker(pp, now)
+ if !ok {
+ return nil, now
+ }
+ }
+
+ node := pp.nextGCMarkWorker
+ pp.nextGCMarkWorker = nil
+
// Run the background mark worker.
gp := node.gp.ptr()
trace := traceAcquire()
@@ -850,6 +892,23 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
return gp, now
}
+// Release an unused pp.nextGCMarkWorker, if any.
+//
+// This function is allowed to have write barriers because it is called from
+// the portion of schedule.
+//
+//go:yeswritebarrierrec
+func (c *gcControllerState) releaseNextGCMarkWorker(pp *p) {
+ node := pp.nextGCMarkWorker
+ if node == nil {
+ return
+ }
+
+ c.markWorkerStop(pp.gcMarkWorkerMode, 0)
+ gcBgMarkWorkerPool.push(&node.node)
+ pp.nextGCMarkWorker = nil
+}
+
// resetLive sets up the controller state for the next mark phase after the end
// of the previous one. Must be called after endCycle and before commit, before
// the world is started.
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index c3d6afb90a..4eecb1cfd9 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -885,7 +885,7 @@ func (s *mspan) reportZombies() {
if length > 1024 {
length = 1024
}
- hexdumpWords(addr, addr+length, nil)
+ hexdumpWords(addr, length, nil)
}
mbits.advance()
abits.advance()
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 711c7790eb..d2ff063b00 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -435,7 +435,7 @@ type mspan struct {
// indicating a free object. freeindex is then adjusted so that subsequent scans begin
// just past the newly discovered free object.
//
- // If freeindex == nelems, this span has no free objects.
+ // If freeindex == nelems, this span has no free objects, though might have reusable objects.
//
// allocBits is a bitmap of objects in this span.
// If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
@@ -2161,7 +2161,7 @@ func removefinalizer(p unsafe.Pointer) {
type specialCleanup struct {
_ sys.NotInHeap
special special
- fn *funcval
+ cleanup cleanupFn
// Globally unique ID for the cleanup, obtained from mheap_.cleanupID.
id uint64
}
@@ -2170,14 +2170,18 @@ type specialCleanup struct {
// cleanups are allowed on an object, and even the same pointer.
// A cleanup id is returned which can be used to uniquely identify
// the cleanup.
-func addCleanup(p unsafe.Pointer, f *funcval) uint64 {
+func addCleanup(p unsafe.Pointer, c cleanupFn) uint64 {
+ // TODO(mknyszek): Consider pooling specialCleanups on the P
+ // so we don't have to take the lock every time. Just locking
+ // is a considerable part of the cost of AddCleanup. This
+ // would also require reserving some cleanup IDs on the P.
lock(&mheap_.speciallock)
s := (*specialCleanup)(mheap_.specialCleanupAlloc.alloc())
mheap_.cleanupID++ // Increment first. ID 0 is reserved.
id := mheap_.cleanupID
unlock(&mheap_.speciallock)
s.special.kind = _KindSpecialCleanup
- s.fn = f
+ s.cleanup = c
s.id = id
mp := acquirem()
@@ -2187,17 +2191,16 @@ func addCleanup(p unsafe.Pointer, f *funcval) uint64 {
// situation where it's possible that markrootSpans
// has already run but mark termination hasn't yet.
if gcphase != _GCoff {
- gcw := &mp.p.ptr().gcw
// Mark the cleanup itself, since the
// special isn't part of the GC'd heap.
- scanblock(uintptr(unsafe.Pointer(&s.fn)), goarch.PtrSize, &oneptrmask[0], gcw, nil)
+ gcScanCleanup(s, &mp.p.ptr().gcw)
}
releasem(mp)
- // Keep f alive. There's a window in this function where it's
- // only reachable via the special while the special hasn't been
- // added to the specials list yet. This is similar to a bug
+ // Keep c and its referents alive. There's a window in this function
+ // where it's only reachable via the special while the special hasn't
+ // been added to the specials list yet. This is similar to a bug
// discovered for weak handles, see #70455.
- KeepAlive(f)
+ KeepAlive(c)
return id
}
@@ -2534,7 +2537,15 @@ func getOrAddWeakHandle(p unsafe.Pointer) *atomic.Uintptr {
s := (*specialWeakHandle)(mheap_.specialWeakHandleAlloc.alloc())
unlock(&mheap_.speciallock)
- handle := new(atomic.Uintptr)
+ // N.B. Pad the weak handle to ensure it doesn't share a tiny
+ // block with any other allocations. This can lead to leaks, such
+ // as in go.dev/issue/76007. As an alternative, we could consider
+ // using the currently-unused 8-byte noscan size class.
+ type weakHandleBox struct {
+ h atomic.Uintptr
+ _ [maxTinySize - unsafe.Sizeof(atomic.Uintptr{})]byte
+ }
+ handle := &(new(weakHandleBox).h)
s.special.kind = _KindSpecialWeakHandle
s.handle = handle
handle.Store(uintptr(p))
@@ -2792,7 +2803,7 @@ func freeSpecial(s *special, p unsafe.Pointer, size uintptr) {
// Cleanups, unlike finalizers, do not resurrect the objects
// they're attached to, so we only need to pass the cleanup
// function, not the object.
- gcCleanups.enqueue(sc.fn)
+ gcCleanups.enqueue(sc.cleanup)
lock(&mheap_.speciallock)
mheap_.specialCleanupAlloc.free(unsafe.Pointer(sc))
unlock(&mheap_.speciallock)
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index e1105afd0f..ff2dec386f 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -746,7 +746,7 @@ func printpanics(p *_panic) {
}
print("panic: ")
printpanicval(p.arg)
- if p.repanicked {
+ if p.recovered && p.repanicked {
print(" [recovered, repanicked]")
} else if p.recovered {
print(" [recovered]")
diff --git a/src/runtime/print.go b/src/runtime/print.go
index c01db9d7f9..d2733fb266 100644
--- a/src/runtime/print.go
+++ b/src/runtime/print.go
@@ -5,7 +5,6 @@
package runtime
import (
- "internal/goarch"
"internal/strconv"
"unsafe"
)
@@ -212,43 +211,3 @@ func printeface(e eface) {
func printiface(i iface) {
print("(", i.tab, ",", i.data, ")")
}
-
-// hexdumpWords prints a word-oriented hex dump of [p, end).
-//
-// If mark != nil, it will be called with each printed word's address
-// and should return a character mark to appear just before that
-// word's value. It can return 0 to indicate no mark.
-func hexdumpWords(p, end uintptr, mark func(uintptr) byte) {
- printlock()
- var markbuf [1]byte
- markbuf[0] = ' '
- minhexdigits = int(unsafe.Sizeof(uintptr(0)) * 2)
- for i := uintptr(0); p+i < end; i += goarch.PtrSize {
- if i%16 == 0 {
- if i != 0 {
- println()
- }
- print(hex(p+i), ": ")
- }
-
- if mark != nil {
- markbuf[0] = mark(p + i)
- if markbuf[0] == 0 {
- markbuf[0] = ' '
- }
- }
- gwrite(markbuf[:])
- val := *(*uintptr)(unsafe.Pointer(p + i))
- print(hex(val))
- print(" ")
-
- // Can we symbolize val?
- fn := findfunc(val)
- if fn.valid() {
- print("<", funcname(fn), "+", hex(val-fn.entry()), "> ")
- }
- }
- minhexdigits = 0
- println()
- printunlock()
-}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 21b276cabf..58fb4bd681 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -3120,7 +3120,7 @@ func startm(pp *p, spinning, lockheld bool) {
//go:nowritebarrierrec
func handoffp(pp *p) {
// handoffp must start an M in any situation where
- // findrunnable would return a G to run on pp.
+ // findRunnable would return a G to run on pp.
// if it has local work, start it straight away
if !runqempty(pp) || !sched.runq.empty() {
@@ -3363,7 +3363,7 @@ func findRunnable() (gp *g, inheritTime, tryWakeP bool) {
mp := getg().m
// The conditions here and in handoffp must agree: if
- // findrunnable would return a G to run, handoffp must start
+ // findRunnable would return a G to run, handoffp must start
// an M.
top:
@@ -3587,7 +3587,7 @@ top:
goto top
}
if releasep() != pp {
- throw("findrunnable: wrong p")
+ throw("findRunnable: wrong p")
}
now = pidleput(pp, now)
unlock(&sched.lock)
@@ -3632,7 +3632,7 @@ top:
if mp.spinning {
mp.spinning = false
if sched.nmspinning.Add(-1) < 0 {
- throw("findrunnable: negative nmspinning")
+ throw("findRunnable: negative nmspinning")
}
// Note the for correctness, only the last M transitioning from
@@ -3705,10 +3705,10 @@ top:
if netpollinited() && (netpollAnyWaiters() || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 {
sched.pollUntil.Store(pollUntil)
if mp.p != 0 {
- throw("findrunnable: netpoll with p")
+ throw("findRunnable: netpoll with p")
}
if mp.spinning {
- throw("findrunnable: netpoll with spinning")
+ throw("findRunnable: netpoll with spinning")
}
delay := int64(-1)
if pollUntil != 0 {
@@ -3974,7 +3974,7 @@ func checkIdleGCNoP() (*p, *g) {
// timers and the network poller if there isn't one already.
func wakeNetPoller(when int64) {
if sched.lastpoll.Load() == 0 {
- // In findrunnable we ensure that when polling the pollUntil
+ // In findRunnable we ensure that when polling the pollUntil
// field is either zero or the time to which the current
// poll is expected to run. This can have a spurious wakeup
// but should never miss a wakeup.
@@ -3999,7 +3999,7 @@ func resetspinning() {
gp.m.spinning = false
nmspinning := sched.nmspinning.Add(-1)
if nmspinning < 0 {
- throw("findrunnable: negative nmspinning")
+ throw("findRunnable: negative nmspinning")
}
// M wakeup policy is deliberately somewhat conservative, so check if we
// need to wakeup another P here. See "Worker thread parking/unparking"
@@ -4136,11 +4136,23 @@ top:
gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available
+ // May be on a new P.
+ pp = mp.p.ptr()
+
// findRunnable may have collected an allp snapshot. The snapshot is
// only required within findRunnable. Clear it to all GC to collect the
// slice.
mp.clearAllpSnapshot()
+ // If the P was assigned a next GC mark worker but findRunnable
+ // selected anything else, release the worker so another P may run it.
+ //
+ // N.B. If this occurs because a higher-priority goroutine was selected
+ // (trace reader), then tryWakeP is set, which will wake another P to
+ // run the worker. If this occurs because the GC is no longer active,
+ // there is no need to wakep.
+ gcController.releaseNextGCMarkWorker(pp)
+
if debug.dontfreezetheworld > 0 && freezing.Load() {
// See comment in freezetheworld. We don't want to perturb
// scheduler state, so we didn't gcstopm in findRunnable, but
@@ -4659,6 +4671,11 @@ func reentersyscall(pc, sp, bp uintptr) {
gp.m.locks--
}
+// debugExtendGrunningNoP is a debug mode that extends the windows in which
+// we're _Grunning without a P in order to try to shake out bugs with code
+// assuming this state is impossible.
+const debugExtendGrunningNoP = false
+
// Standard syscall entry used by the go syscall library and normal cgo calls.
//
// This is exported via linkname to assembly in the syscall package and x/sys.
@@ -4771,6 +4788,9 @@ func entersyscallblock() {
// <--
// Caution: we're in a small window where we are in _Grunning without a P.
// -->
+ if debugExtendGrunningNoP {
+ usleep(10)
+ }
casgstatus(gp, _Grunning, _Gsyscall)
if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp {
systemstack(func() {
@@ -4853,6 +4873,9 @@ func exitsyscall() {
// Caution: we're in a window where we may be in _Grunning without a P.
// Either we will grab a P or call exitsyscall0, where we'll switch to
// _Grunnable.
+ if debugExtendGrunningNoP {
+ usleep(10)
+ }
// Grab and clear our old P.
oldp := gp.m.oldp.ptr()
@@ -6026,8 +6049,10 @@ func procresize(nprocs int32) *p {
unlock(&allpLock)
}
+ // Assign Ms to Ps with runnable goroutines.
var runnablePs *p
var runnablePsNeedM *p
+ var idlePs *p
for i := nprocs - 1; i >= 0; i-- {
pp := allp[i]
if gp.m.p.ptr() == pp {
@@ -6035,7 +6060,8 @@ func procresize(nprocs int32) *p {
}
pp.status = _Pidle
if runqempty(pp) {
- pidleput(pp, now)
+ pp.link.set(idlePs)
+ idlePs = pp
continue
}
@@ -6061,6 +6087,8 @@ func procresize(nprocs int32) *p {
pp.link.set(runnablePs)
runnablePs = pp
}
+ // Assign Ms to remaining runnable Ps without usable oldm. See comment
+ // above.
for runnablePsNeedM != nil {
pp := runnablePsNeedM
runnablePsNeedM = pp.link.ptr()
@@ -6071,6 +6099,62 @@ func procresize(nprocs int32) *p {
runnablePs = pp
}
+ // Now that we've assigned Ms to Ps with runnable goroutines, assign GC
+ // mark workers to remaining idle Ps, if needed.
+ //
+ // By assigning GC workers to Ps here, we slightly speed up starting
+ // the world, as we will start enough Ps to run all of the user
+ // goroutines and GC mark workers all at once, rather than using a
+ // sequence of wakep calls as each P's findRunnable realizes it needs
+ // to run a mark worker instead of a user goroutine.
+ //
+ // By assigning GC workers to Ps only _after_ previously-running Ps are
+ // assigned Ms, we ensure that goroutines previously running on a P
+ // continue to run on the same P, with GC mark workers preferring
+ // previously-idle Ps. This helps prevent goroutines from shuffling
+ // around too much across STW.
+ //
+ // N.B., if there aren't enough Ps left in idlePs for all of the GC
+ // mark workers, then findRunnable will still choose to run mark
+ // workers on Ps assigned above.
+ //
+ // N.B., we do this during any STW in the mark phase, not just the
+ // sweep termination STW that starts the mark phase. gcBgMarkWorker
+ // always preempts by removing itself from the P, so even unrelated
+ // STWs during the mark require that Ps reselect mark workers upon
+ // restart.
+ if gcBlackenEnabled != 0 {
+ for idlePs != nil {
+ pp := idlePs
+
+ ok, _ := gcController.assignWaitingGCWorker(pp, now)
+ if !ok {
+ // No more mark workers needed.
+ break
+ }
+
+ // Got a worker, P is now runnable.
+ //
+ // mget may return nil if there aren't enough Ms, in
+ // which case startTheWorldWithSema will start one.
+ //
+ // N.B. findRunnableGCWorker will make the worker G
+ // itself runnable.
+ idlePs = pp.link.ptr()
+ mp := mget()
+ pp.m.set(mp)
+ pp.link.set(runnablePs)
+ runnablePs = pp
+ }
+ }
+
+ // Finally, any remaining Ps are truly idle.
+ for idlePs != nil {
+ pp := idlePs
+ idlePs = pp.link.ptr()
+ pidleput(pp, now)
+ }
+
stealOrder.reset(uint32(nprocs))
var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
@@ -6173,6 +6257,10 @@ func releasepNoTrace() *p {
print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n")
throw("releasep: invalid p state")
}
+
+ // P must clear if nextGCMarkWorker if it stops.
+ gcController.releaseNextGCMarkWorker(pp)
+
gp.m.p = 0
pp.m = 0
pp.status = _Pidle
@@ -7259,7 +7347,7 @@ func pidlegetSpinning(now int64) (*p, int64) {
pp, now := pidleget(now)
if pp == nil {
- // See "Delicate dance" comment in findrunnable. We found work
+ // See "Delicate dance" comment in findRunnable. We found work
// that we cannot take, we must synchronize with non-spinning
// Ms that may be preparing to drop their P.
sched.needspinning.Store(1)
@@ -7497,23 +7585,36 @@ func runqgrab(pp *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool)
// Try to steal from pp.runnext.
if next := pp.runnext; next != 0 {
if pp.status == _Prunning {
- // Sleep to ensure that pp isn't about to run the g
- // we are about to steal.
- // The important use case here is when the g running
- // on pp ready()s another g and then almost
- // immediately blocks. Instead of stealing runnext
- // in this window, back off to give pp a chance to
- // schedule runnext. This will avoid thrashing gs
- // between different Ps.
- // A sync chan send/recv takes ~50ns as of time of
- // writing, so 3us gives ~50x overshoot.
- if !osHasLowResTimer {
- usleep(3)
- } else {
- // On some platforms system timer granularity is
- // 1-15ms, which is way too much for this
- // optimization. So just yield.
- osyield()
+ if mp := pp.m.ptr(); mp != nil {
+ if gp := mp.curg; gp == nil || readgstatus(gp)&^_Gscan != _Gsyscall {
+ // Sleep to ensure that pp isn't about to run the g
+ // we are about to steal.
+ // The important use case here is when the g running
+ // on pp ready()s another g and then almost
+ // immediately blocks. Instead of stealing runnext
+ // in this window, back off to give pp a chance to
+ // schedule runnext. This will avoid thrashing gs
+ // between different Ps.
+ // A sync chan send/recv takes ~50ns as of time of
+ // writing, so 3us gives ~50x overshoot.
+ // If curg is nil, we assume that the P is likely
+ // to be in the scheduler. If curg isn't nil and isn't
+ // in a syscall, then it's either running, waiting, or
+ // runnable. In this case we want to sleep because the
+ // P might either call into the scheduler soon (running),
+ // or already is (since we found a waiting or runnable
+ // goroutine hanging off of a running P, suggesting it
+ // either recently transitioned out of running, or will
+ // transition to running shortly).
+ if !osHasLowResTimer {
+ usleep(3)
+ } else {
+ // On some platforms system timer granularity is
+ // 1-15ms, which is way too much for this
+ // optimization. So just yield.
+ osyield()
+ }
+ }
}
}
if !pp.runnext.cas(next, 0) {
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go
index b3084f4895..35a1aeab1f 100644
--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -1221,7 +1221,7 @@ func TestTraceSTW(t *testing.T) {
var errors int
for i := range runs {
- err := runTestTracesSTW(t, i)
+ err := runTestTracesSTW(t, i, "TraceSTW", "stop-the-world (read mem stats)")
if err != nil {
t.Logf("Run %d failed: %v", i, err)
errors++
@@ -1235,7 +1235,43 @@ func TestTraceSTW(t *testing.T) {
}
}
-func runTestTracesSTW(t *testing.T, run int) (err error) {
+// TestTraceGCSTW verifies that goroutines continue running on the same M and P
+// after a GC STW.
+func TestTraceGCSTW(t *testing.T) {
+ // Very similar to TestTraceSTW, but using a STW that starts the GC.
+ // When the GC starts, the background GC mark workers start running,
+ // which provide an additional source of disturbance to the scheduler.
+ //
+ // procresize assigns GC workers to previously-idle Ps to avoid
+ // changing what the previously-running Ps are doing.
+
+ if testing.Short() {
+ t.Skip("skipping in -short mode")
+ }
+
+ if runtime.NumCPU() < 8 {
+ t.Skip("This test sets GOMAXPROCS=8 and wants to avoid thread descheduling as much as possible. Skip on machines with less than 8 CPUs")
+ }
+
+ const runs = 50
+
+ var errors int
+ for i := range runs {
+ err := runTestTracesSTW(t, i, "TraceGCSTW", "stop-the-world (GC sweep termination)")
+ if err != nil {
+ t.Logf("Run %d failed: %v", i, err)
+ errors++
+ }
+ }
+
+ pct := float64(errors)/float64(runs)
+ t.Logf("Errors: %d/%d = %f%%", errors, runs, 100*pct)
+ if pct > 0.25 {
+ t.Errorf("Error rate too high")
+ }
+}
+
+func runTestTracesSTW(t *testing.T, run int, name, stwType string) (err error) {
t.Logf("Run %d", run)
// By default, TSAN sleeps for 1s at exit to allow background
@@ -1243,7 +1279,7 @@ func runTestTracesSTW(t *testing.T, run int) (err error) {
// much, since we are running 50 iterations, so disable the sleep.
//
// Outside of race mode, GORACE does nothing.
- buf := []byte(runTestProg(t, "testprog", "TraceSTW", "GORACE=atexit_sleep_ms=0"))
+ buf := []byte(runTestProg(t, "testprog", name, "GORACE=atexit_sleep_ms=0"))
// We locally "fail" the run (return an error) if the trace exhibits
// unwanted scheduling. i.e., the target goroutines did not remain on
@@ -1253,7 +1289,7 @@ func runTestTracesSTW(t *testing.T, run int) (err error) {
// occur, such as a trace parse error.
defer func() {
if err != nil || t.Failed() {
- testtrace.Dump(t, fmt.Sprintf("TestTraceSTW-run%d", run), []byte(buf), false)
+ testtrace.Dump(t, fmt.Sprintf("Test%s-run%d", name, run), []byte(buf), false)
}
}()
@@ -1509,12 +1545,10 @@ findEnd:
break findEnd
case trace.EventRangeBegin:
r := ev.Range()
- if r.Name == "stop-the-world (read mem stats)" {
+ if r.Name == stwType {
// Note when we see the STW begin. This is not
// load bearing; it's purpose is simply to fail
- // the test if we manage to remove the STW from
- // ReadMemStat, so we remember to change this
- // test to add some new source of STW.
+ // the test if we accidentally remove the STW.
stwSeen = true
}
}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 6c955460d4..56082bf7f5 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -854,6 +854,18 @@ type p struct {
// mark worker started.
gcMarkWorkerStartTime int64
+ // nextGCMarkWorker is the next mark worker to run. This may be set
+ // during start-the-world to assign a worker to this P. The P runs this
+ // worker on the next call to gcController.findRunnableGCWorker. If the
+ // P runs something else or stops, it must release this worker via
+ // gcController.releaseNextGCMarkWorker.
+ //
+ // See comment in gcBgMarkWorker about the lifetime of
+ // gcBgMarkWorkerNode.
+ //
+ // Only accessed by this P or during STW.
+ nextGCMarkWorker *gcBgMarkWorkerNode
+
// gcw is this P's GC work buffer cache. The work buffer is
// filled by write barriers, drained by mutator assists, and
// disposed on certain GC state transitions.
@@ -1425,9 +1437,9 @@ var (
// must be set. An idle P (passed to pidleput) cannot add new timers while
// idle, so if it has no timers at that time, its mask may be cleared.
//
- // Thus, we get the following effects on timer-stealing in findrunnable:
+ // Thus, we get the following effects on timer-stealing in findRunnable:
//
- // - Idle Ps with no timers when they go idle are never checked in findrunnable
+ // - Idle Ps with no timers when they go idle are never checked in findRunnable
// (for work- or timer-stealing; this is the ideal case).
// - Running Ps must always be checked.
// - Idle Ps whose timers are stolen must continue to be checked until they run
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index e31d5dccb2..a9e8fc1610 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -399,3 +399,107 @@ func bytealg_MakeNoZero(len int) []byte {
cap := roundupsize(uintptr(len), true)
return unsafe.Slice((*byte)(mallocgc(cap, nil, false)), cap)[:len]
}
+
+// moveSlice copies the input slice to the heap and returns it.
+// et is the element type of the slice.
+func moveSlice(et *_type, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) {
+ if cap == 0 {
+ if old != nil {
+ old = unsafe.Pointer(&zerobase)
+ }
+ return old, 0, 0
+ }
+ capmem := uintptr(cap) * et.Size_
+ new := mallocgc(capmem, et, true)
+ bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), capmem, et)
+ memmove(new, old, capmem)
+ return new, len, cap
+}
+
+// moveSliceNoScan is like moveSlice except the element type is known to
+// not have any pointers. We instead pass in the size of the element.
+func moveSliceNoScan(elemSize uintptr, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) {
+ if cap == 0 {
+ if old != nil {
+ old = unsafe.Pointer(&zerobase)
+ }
+ return old, 0, 0
+ }
+ capmem := uintptr(cap) * elemSize
+ new := mallocgc(capmem, nil, false)
+ memmove(new, old, capmem)
+ return new, len, cap
+}
+
+// moveSliceNoCap is like moveSlice, but can pick any appropriate capacity
+// for the returned slice.
+// Elements between len and cap in the returned slice will be zeroed.
+func moveSliceNoCap(et *_type, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) {
+ if len == 0 {
+ if old != nil {
+ old = unsafe.Pointer(&zerobase)
+ }
+ return old, 0, 0
+ }
+ lenmem := uintptr(len) * et.Size_
+ capmem := roundupsize(lenmem, false)
+ new := mallocgc(capmem, et, true)
+ bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), lenmem, et)
+ memmove(new, old, lenmem)
+ return new, len, int(capmem / et.Size_)
+}
+
+// moveSliceNoCapNoScan is a combination of moveSliceNoScan and moveSliceNoCap.
+func moveSliceNoCapNoScan(elemSize uintptr, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) {
+ if len == 0 {
+ if old != nil {
+ old = unsafe.Pointer(&zerobase)
+ }
+ return old, 0, 0
+ }
+ lenmem := uintptr(len) * elemSize
+ capmem := roundupsize(lenmem, true)
+ new := mallocgc(capmem, nil, false)
+ memmove(new, old, lenmem)
+ if capmem > lenmem {
+ memclrNoHeapPointers(add(new, lenmem), capmem-lenmem)
+ }
+ return new, len, int(capmem / elemSize)
+}
+
+// growsliceBuf is like growslice, but we can use the given buffer
+// as a backing store if we want. bufPtr must be on the stack.
+func growsliceBuf(oldPtr unsafe.Pointer, newLen, oldCap, num int, et *_type, bufPtr unsafe.Pointer, bufLen int) slice {
+ if newLen > bufLen {
+ // Doesn't fit, process like a normal growslice.
+ return growslice(oldPtr, newLen, oldCap, num, et)
+ }
+ oldLen := newLen - num
+ if oldPtr != bufPtr && oldLen != 0 {
+ // Move data to start of buffer.
+ // Note: bufPtr is on the stack, so no write barrier needed.
+ memmove(bufPtr, oldPtr, uintptr(oldLen)*et.Size_)
+ }
+ // Pick a new capacity.
+ //
+ // Unlike growslice, we don't need to double the size each time.
+ // The work done here is not proportional to the length of the slice.
+ // (Unless the memmove happens above, but that is rare, and in any
+ // case there are not many elements on this path.)
+ //
+ // Instead, we try to just bump up to the next size class.
+ // This will ensure that we don't waste any space when we eventually
+ // call moveSlice with the resulting slice.
+ newCap := int(roundupsize(uintptr(newLen)*et.Size_, !et.Pointers()) / et.Size_)
+
+ // Zero slice beyond newLen.
+ // The buffer is stack memory, so NoHeapPointers is ok.
+ // Caller will overwrite [oldLen:newLen], so we don't need to zero that portion.
+ // If et.Pointers(), buffer is at least initialized so we don't need to
+ // worry about the caller overwriting junk in [oldLen:newLen].
+ if newLen < newCap {
+ memclrNoHeapPointers(add(bufPtr, uintptr(newLen)*et.Size_), uintptr(newCap-newLen)*et.Size_)
+ }
+
+ return slice{bufPtr, newLen, newCap}
+}
diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go
index cd2bc26d1e..5463b6c02f 100644
--- a/src/runtime/slice_test.go
+++ b/src/runtime/slice_test.go
@@ -6,6 +6,9 @@ package runtime_test
import (
"fmt"
+ "internal/race"
+ "internal/testenv"
+ "runtime"
"testing"
)
@@ -499,3 +502,319 @@ func BenchmarkAppendInPlace(b *testing.B) {
})
}
+
+//go:noinline
+func byteSlice(n int) []byte {
+ var r []byte
+ for i := range n {
+ r = append(r, byte(i))
+ }
+ return r
+}
+func TestAppendByteInLoop(t *testing.T) {
+ testenv.SkipIfOptimizationOff(t)
+ if race.Enabled {
+ t.Skip("skipping in -race mode")
+ }
+ for _, test := range [][3]int{
+ {0, 0, 0},
+ {1, 1, 8},
+ {2, 1, 8},
+ {8, 1, 8},
+ {9, 1, 16},
+ {16, 1, 16},
+ {17, 1, 24},
+ {24, 1, 24},
+ {25, 1, 32},
+ {32, 1, 32},
+ {33, 1, 64}, // If we up the stack buffer size from 32->64, this line and the next would become 48.
+ {48, 1, 64},
+ {49, 1, 64},
+ {64, 1, 64},
+ {65, 2, 128},
+ } {
+ n := test[0]
+ want := test[1]
+ wantCap := test[2]
+ var r []byte
+ got := testing.AllocsPerRun(10, func() {
+ r = byteSlice(n)
+ })
+ if got != float64(want) {
+ t.Errorf("for size %d, got %f allocs want %d", n, got, want)
+ }
+ if cap(r) != wantCap {
+ t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap)
+ }
+ }
+}
+
+//go:noinline
+func ptrSlice(n int, p *[]*byte) {
+ var r []*byte
+ for range n {
+ r = append(r, nil)
+ }
+ *p = r
+}
+func TestAppendPtrInLoop(t *testing.T) {
+ testenv.SkipIfOptimizationOff(t)
+ if race.Enabled {
+ t.Skip("skipping in -race mode")
+ }
+ var tests [][3]int
+ if runtime.PtrSize == 8 {
+ tests = [][3]int{
+ {0, 0, 0},
+ {1, 1, 1},
+ {2, 1, 2},
+ {3, 1, 3}, // This is the interesting case, allocates 24 bytes when before it was 32.
+ {4, 1, 4},
+ {5, 1, 8},
+ {6, 1, 8},
+ {7, 1, 8},
+ {8, 1, 8},
+ {9, 2, 16},
+ }
+ } else {
+ tests = [][3]int{
+ {0, 0, 0},
+ {1, 1, 2},
+ {2, 1, 2},
+ {3, 1, 4},
+ {4, 1, 4},
+ {5, 1, 6}, // These two are also 24 bytes instead of 32.
+ {6, 1, 6}, //
+ {7, 1, 8},
+ {8, 1, 8},
+ {9, 1, 16},
+ {10, 1, 16},
+ {11, 1, 16},
+ {12, 1, 16},
+ {13, 1, 16},
+ {14, 1, 16},
+ {15, 1, 16},
+ {16, 1, 16},
+ {17, 2, 32},
+ }
+ }
+ for _, test := range tests {
+ n := test[0]
+ want := test[1]
+ wantCap := test[2]
+ var r []*byte
+ got := testing.AllocsPerRun(10, func() {
+ ptrSlice(n, &r)
+ })
+ if got != float64(want) {
+ t.Errorf("for size %d, got %f allocs want %d", n, got, want)
+ }
+ if cap(r) != wantCap {
+ t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap)
+ }
+ }
+}
+
+//go:noinline
+func byteCapSlice(n int) ([]byte, int) {
+ var r []byte
+ for i := range n {
+ r = append(r, byte(i))
+ }
+ return r, cap(r)
+}
+func TestAppendByteCapInLoop(t *testing.T) {
+ testenv.SkipIfOptimizationOff(t)
+ if race.Enabled {
+ t.Skip("skipping in -race mode")
+ }
+ for _, test := range [][3]int{
+ {0, 0, 0},
+ {1, 1, 8},
+ {2, 1, 8},
+ {8, 1, 8},
+ {9, 1, 16},
+ {16, 1, 16},
+ {17, 1, 24},
+ {24, 1, 24},
+ {25, 1, 32},
+ {32, 1, 32},
+ {33, 1, 64},
+ {48, 1, 64},
+ {49, 1, 64},
+ {64, 1, 64},
+ {65, 2, 128},
+ } {
+ n := test[0]
+ want := test[1]
+ wantCap := test[2]
+ var r []byte
+ got := testing.AllocsPerRun(10, func() {
+ r, _ = byteCapSlice(n)
+ })
+ if got != float64(want) {
+ t.Errorf("for size %d, got %f allocs want %d", n, got, want)
+ }
+ if cap(r) != wantCap {
+ t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap)
+ }
+ }
+}
+
+func TestAppendGeneric(t *testing.T) {
+ type I *int
+ r := testAppendGeneric[I](100)
+ if len(r) != 100 {
+ t.Errorf("bad length")
+ }
+}
+
+//go:noinline
+func testAppendGeneric[E any](n int) []E {
+ var r []E
+ var z E
+ for range n {
+ r = append(r, z)
+ }
+ return r
+}
+
+func appendSomeBytes(r []byte, s []byte) []byte {
+ for _, b := range s {
+ r = append(r, b)
+ }
+ return r
+}
+
+func TestAppendOfArg(t *testing.T) {
+ r := make([]byte, 24)
+ for i := 0; i < 24; i++ {
+ r[i] = byte(i)
+ }
+ appendSomeBytes(r, []byte{25, 26, 27})
+ // Do the same thing, trying to overwrite any
+ // stack-allocated buffers used above.
+ s := make([]byte, 24)
+ for i := 0; i < 24; i++ {
+ s[i] = 99
+ }
+ appendSomeBytes(s, []byte{99, 99, 99})
+ // Check that we still have the right data.
+ for i, b := range r {
+ if b != byte(i) {
+ t.Errorf("r[%d]=%d, want %d", i, b, byte(i))
+ }
+ }
+
+}
+
+func BenchmarkAppendInLoop(b *testing.B) {
+ for _, size := range []int{0, 1, 8, 16, 32, 64, 128} {
+ b.Run(fmt.Sprintf("%d", size),
+ func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ byteSlice(size)
+ }
+ })
+ }
+}
+
+func TestMoveToHeapEarly(t *testing.T) {
+ // Just checking that this compiles.
+ var x []int
+ y := x // causes a move2heap in the entry block
+ for range 5 {
+ x = append(x, 5)
+ }
+ _ = y
+}
+
+func TestMoveToHeapCap(t *testing.T) {
+ var c int
+ r := func() []byte {
+ var s []byte
+ for i := range 10 {
+ s = append(s, byte(i))
+ }
+ c = cap(s)
+ return s
+ }()
+ if c != cap(r) {
+ t.Errorf("got cap=%d, want %d", c, cap(r))
+ }
+ sinkSlice = r
+}
+
+//go:noinline
+func runit(f func()) {
+ f()
+}
+
+func TestMoveToHeapClosure1(t *testing.T) {
+ var c int
+ r := func() []byte {
+ var s []byte
+ for i := range 10 {
+ s = append(s, byte(i))
+ }
+ runit(func() {
+ c = cap(s)
+ })
+ return s
+ }()
+ if c != cap(r) {
+ t.Errorf("got cap=%d, want %d", c, cap(r))
+ }
+ sinkSlice = r
+}
+func TestMoveToHeapClosure2(t *testing.T) {
+ var c int
+ r := func() []byte {
+ var s []byte
+ for i := range 10 {
+ s = append(s, byte(i))
+ }
+ c = func() int {
+ return cap(s)
+ }()
+ return s
+ }()
+ if c != cap(r) {
+ t.Errorf("got cap=%d, want %d", c, cap(r))
+ }
+ sinkSlice = r
+}
+
+//go:noinline
+func buildClosure(t *testing.T) ([]byte, func()) {
+ var s []byte
+ for i := range 20 {
+ s = append(s, byte(i))
+ }
+ c := func() {
+ for i, b := range s {
+ if b != byte(i) {
+ t.Errorf("s[%d]=%d, want %d", i, b, i)
+ }
+ }
+ }
+ return s, c
+}
+
+func TestMoveToHeapClosure3(t *testing.T) {
+ _, f := buildClosure(t)
+ overwriteStack(0)
+ f()
+}
+
+//go:noinline
+func overwriteStack(n int) uint64 {
+ var x [100]uint64
+ for i := range x {
+ x[i] = 0xabcdabcdabcdabcd
+ }
+ return x[n]
+}
+
+var sinkSlice []byte
diff --git a/src/runtime/sys_riscv64.go b/src/runtime/sys_riscv64.go
index e710840819..65dc684c33 100644
--- a/src/runtime/sys_riscv64.go
+++ b/src/runtime/sys_riscv64.go
@@ -4,7 +4,12 @@
package runtime
-import "unsafe"
+import (
+ "unsafe"
+
+ "internal/abi"
+ "internal/runtime/sys"
+)
// adjust Gobuf as if it executed a call to fn with context ctxt
// and then did an immediate Gosave.
@@ -12,7 +17,9 @@ func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
if buf.lr != 0 {
throw("invalid use of gostartcall")
}
- buf.lr = buf.pc
+ // Use double the PC quantum on riscv64, so that we retain
+ // four byte alignment and use non-compressed instructions.
+ buf.lr = abi.FuncPCABI0(goexit) + sys.PCQuantum*2
buf.pc = uintptr(fn)
buf.ctxt = ctxt
}
diff --git a/src/runtime/testdata/testprog/crash.go b/src/runtime/testdata/testprog/crash.go
index 556215a71e..fcce388871 100644
--- a/src/runtime/testdata/testprog/crash.go
+++ b/src/runtime/testdata/testprog/crash.go
@@ -22,6 +22,7 @@ func init() {
register("RepanickedPanic", RepanickedPanic)
register("RepanickedMiddlePanic", RepanickedMiddlePanic)
register("RepanickedPanicSandwich", RepanickedPanicSandwich)
+ register("DoublePanicWithSameValue", DoublePanicWithSameValue)
}
func test(name string) {
@@ -189,3 +190,13 @@ func RepanickedPanicSandwich() {
panic("outer")
}()
}
+
+// Double panic with same value and not recovered.
+// See issue 76099.
+func DoublePanicWithSameValue() {
+ var e any = "message"
+ defer func() {
+ panic(e)
+ }()
+ panic(e)
+}
diff --git a/src/runtime/testdata/testprog/gc.go b/src/runtime/testdata/testprog/gc.go
index bbe1453401..32e2c5e1b4 100644
--- a/src/runtime/testdata/testprog/gc.go
+++ b/src/runtime/testdata/testprog/gc.go
@@ -396,7 +396,7 @@ func gcMemoryLimit(gcPercent int) {
// should do considerably better than this bound.
bound := int64(myLimit + 16<<20)
if runtime.GOOS == "darwin" {
- bound += 16 << 20 // Be more lax on Darwin, see issue 73136.
+ bound += 24 << 20 // Be more lax on Darwin, see issue 73136.
}
start := time.Now()
for time.Since(start) < 200*time.Millisecond {
diff --git a/src/runtime/testdata/testprog/stw_trace.go b/src/runtime/testdata/testprog/stw_trace.go
index 0fed55b875..0fa15da09e 100644
--- a/src/runtime/testdata/testprog/stw_trace.go
+++ b/src/runtime/testdata/testprog/stw_trace.go
@@ -7,15 +7,18 @@ package main
import (
"context"
"log"
+ "math/rand/v2"
"os"
"runtime"
"runtime/debug"
+ "runtime/metrics"
"runtime/trace"
"sync/atomic"
)
func init() {
register("TraceSTW", TraceSTW)
+ register("TraceGCSTW", TraceGCSTW)
}
// The parent writes to ping and waits for the children to write back
@@ -53,7 +56,7 @@ func TraceSTW() {
// https://go.dev/issue/65694). Alternatively, we could just ignore the
// trace if the GC runs.
runtime.GOMAXPROCS(4)
- debug.SetGCPercent(0)
+ debug.SetGCPercent(-1)
if err := trace.Start(os.Stdout); err != nil {
log.Fatalf("failed to start tracing: %v", err)
@@ -86,6 +89,112 @@ func TraceSTW() {
stop.Store(true)
}
+// Variant of TraceSTW for GC STWs. We want the GC mark workers to start on
+// previously-idle Ps, rather than bumping the current P.
+func TraceGCSTW() {
+ ctx := context.Background()
+
+ // The idea here is to have 2 target goroutines that are constantly
+ // running. When the world restarts after STW, we expect these
+ // goroutines to continue execution on the same M and P.
+ //
+ // Set GOMAXPROCS=8 to make room for the 2 target goroutines, 1 parent,
+ // 2 dedicated workers, and a bit of slack.
+ //
+ // Disable the GC initially so we can be sure it only triggers once we
+ // are ready.
+ runtime.GOMAXPROCS(8)
+ debug.SetGCPercent(-1)
+
+ if err := trace.Start(os.Stdout); err != nil {
+ log.Fatalf("failed to start tracing: %v", err)
+ }
+ defer trace.Stop()
+
+ for i := range 2 {
+ go traceSTWTarget(i)
+ }
+
+ // Wait for children to start running.
+ ping.Store(1)
+ for pong[0].Load() != 1 {}
+ for pong[1].Load() != 1 {}
+
+ trace.Log(ctx, "TraceSTW", "start")
+
+ // STW
+ triggerGC()
+
+ // Make sure to run long enough for the children to schedule again
+ // after STW. This is included for good measure, but the goroutines
+ // really ought to have already scheduled since the entire GC
+ // completed.
+ ping.Store(2)
+ for pong[0].Load() != 2 {}
+ for pong[1].Load() != 2 {}
+
+ trace.Log(ctx, "TraceSTW", "end")
+
+ stop.Store(true)
+}
+
+func triggerGC() {
+ // Allocate a bunch to trigger the GC rather than using runtime.GC. The
+ // latter blocks until the GC is complete, which is convenient, but
+ // messes with scheduling as it gives this P a chance to steal the
+ // other goroutines before their Ps get up and running again.
+
+ // Bring heap size up prior to enabling the GC to ensure that there is
+ // a decent amount of work in case the GC triggers immediately upon
+ // re-enabling.
+ for range 1000 {
+ alloc()
+ }
+
+ sample := make([]metrics.Sample, 1)
+ sample[0].Name = "/gc/cycles/total:gc-cycles"
+ metrics.Read(sample)
+
+ start := sample[0].Value.Uint64()
+
+ debug.SetGCPercent(100)
+
+ // Keep allocating until the GC is complete. We really only need to
+ // continue until the mark workers are scheduled, but there isn't a
+ // good way to measure that.
+ for {
+ metrics.Read(sample)
+ if sample[0].Value.Uint64() != start {
+ return
+ }
+
+ alloc()
+ }
+}
+
+// Allocate a tree data structure to generate plenty of scan work for the GC.
+
+type node struct {
+ children []*node
+}
+
+var gcSink node
+
+func alloc() {
+ // 10% chance of adding a node a each layer.
+
+ curr := &gcSink
+ for {
+ if len(curr.children) == 0 || rand.Float32() < 0.1 {
+ curr.children = append(curr.children, new(node))
+ return
+ }
+
+ i := rand.IntN(len(curr.children))
+ curr = curr.children[i]
+ }
+}
+
// Manually insert a morestack call. Leaf functions can omit morestack, but
// non-leaf functions should include them.
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 6649f72471..74aaeba876 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -1366,16 +1366,19 @@ func tracebackHexdump(stk stack, frame *stkframe, bad uintptr) {
// Print the hex dump.
print("stack: frame={sp:", hex(frame.sp), ", fp:", hex(frame.fp), "} stack=[", hex(stk.lo), ",", hex(stk.hi), ")\n")
- hexdumpWords(lo, hi, func(p uintptr) byte {
- switch p {
- case frame.fp:
- return '>'
- case frame.sp:
- return '<'
- case bad:
- return '!'
+ hexdumpWords(lo, hi-lo, func(p uintptr, m hexdumpMarker) {
+ if p == frame.fp {
+ m.start()
+ println("FP")
+ }
+ if p == frame.sp {
+ m.start()
+ println("SP")
+ }
+ if p == bad {
+ m.start()
+ println("bad")
}
- return 0
})
}