From 50126a8e44f76134349edf5ba3cc94efabc61c80 Mon Sep 17 00:00:00 2001 From: “Muhammad Date: Sun, 1 Mar 2026 23:53:03 +0000 Subject: runtime, cmd/compile: use preemptible memclr for large pointer-free clears MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Large memory clearing operations (via clear() or large slice allocation) currently use non-preemptible assembly loops. This blocks the Garbage Collector from performing a Stop The World (STW) event, leading to significant tail latency or even indefinite hangs in tight loops. This change introduces memclrNoHeapPointersPreemptible, which chunks clears into 256KB blocks with preemption checks. The compiler's walk phase is updated to emit this call for large pointer-free clears. To prevent regressions, SSA rewrite rules are added to ensure that constant-size clears (which are common and small) continue to be inlined into OpZero assembly. Benchmarks on darwin/arm64: - STW with 50MB clear: Improved from 'Hung' to ~500µs max pause. - Small clears (5-64B): No measurable regression. - Large clears (1M-64M): No measurable regression. Fixes #69327 Change-Id: Ide14d6bcdca1f60d6ac95443acb57da9a8822538 Reviewed-on: https://go-review.googlesource.com/c/go/+/750480 Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Auto-Submit: Keith Randall Reviewed-by: Robert Griesemer --- src/runtime/malloc.go | 9 +++++++++ src/runtime/memmove_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'src/runtime') diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index c08bc7574b..2144ea602a 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -2202,6 +2202,15 @@ func memclrNoHeapPointersChunked(size uintptr, x unsafe.Pointer) { } } +// memclrNoHeapPointersPreemptible is the compiler-callable entry point +// for clearing large buffers with preemption support. It has the same +// signature as memclrNoHeapPointers so the compiler can emit calls to it +// directly. It delegates to memclrNoHeapPointersChunked which splits the +// work into 256KB chunks with preemption checks between them. +func memclrNoHeapPointersPreemptible(ptr unsafe.Pointer, n uintptr) { + memclrNoHeapPointersChunked(n, ptr) +} + // implementation of new builtin // compiler (both frontend and SSA backend) knows the signature // of this function. diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go index 6065a84553..292dd0f686 100644 --- a/src/runtime/memmove_test.go +++ b/src/runtime/memmove_test.go @@ -1374,3 +1374,28 @@ func BenchmarkMemmoveKnownSize1024(b *testing.B) { memclrSink = p.x[:] } + +func BenchmarkSTWLatency(b *testing.B) { + const bufSize = 50 << 20 // 50 MiB + + buf := make([]byte, bufSize) + var stop atomic.Bool + go func() { + for !stop.Load() { + clear(buf) + } + }() + + var maxPause int64 + for i := 0; i < b.N; i++ { + start := Nanotime() + GC() + elapsed := Nanotime() - start + if elapsed > maxPause { + maxPause = elapsed + } + } + stop.Store(true) + + b.ReportMetric(float64(maxPause)/1e3, "max-pause-µs") +} -- cgit v1.3-6-g1900