aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Ripley <nick.ripley@datadoghq.com>2025-11-21 16:01:29 +0000
committerNick Ripley <nick.ripley@datadoghq.com>2025-11-21 13:25:29 -0800
commit121bc3e464b901327a5c138d8a992bb85c440862 (patch)
treee3fd8bb4869b0033cae4c8be8beef7188d85eddd
parentb604148c4e8ad61640d12cde40379bc3c137d8a6 (diff)
downloadgo-121bc3e464b901327a5c138d8a992bb85c440862.tar.xz
runtime/pprof: remove hard-coded sleep in CPU profile reader
The CPU profiler reader goroutine has a hard-coded 100ms sleep between reads of the CPU profile sample buffer. This is done because waking up the CPU profile reader is not signal-safe on some platforms. As a consequence, stopping the profiler takes 200ms (one iteration to read the last samples and one to see the "eof"), and on many-core systems the reader does not wake up frequently enought to keep up with incoming data. This CL removes the sleep where it is safe to do so, following a suggestion by Austin Clements in the comments on CL 445375. We let the reader fully block, and wake up the reader when the buffer is over half-full. Fixes #63043 Updates #56029 Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-arm64-longtest,gotip-linux-386-longtest Change-Id: I9f7e7e9918a4a6f16e80f6aaf33103126568a81f Reviewed-on: https://go-review.googlesource.com/c/go/+/610815 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Mark Freeman <markfreeman@google.com>
-rw-r--r--src/runtime/pprof/pprof.go5
-rw-r--r--src/runtime/profbuf.go25
-rw-r--r--src/runtime/profbuf_test.go76
3 files changed, 103 insertions, 3 deletions
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go
index 78d00af6ca..c617a8b26a 100644
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@@ -924,7 +924,10 @@ func profileWriter(w io.Writer) {
b := newProfileBuilder(w)
var err error
for {
- time.Sleep(100 * time.Millisecond)
+ if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
+ // see runtime_pprof_readProfile
+ time.Sleep(100 * time.Millisecond)
+ }
data, tags, eof := readProfile()
if e := b.addCPUData(data, tags); e != nil && err == nil {
err = e
diff --git a/src/runtime/profbuf.go b/src/runtime/profbuf.go
index 40aff9e1d2..147212206f 100644
--- a/src/runtime/profbuf.go
+++ b/src/runtime/profbuf.go
@@ -38,6 +38,10 @@ import (
// be returned by read. By definition, r ≤ rNext ≤ w (before wraparound),
// and rNext is only used by the reader, so it can be accessed without atomics.
//
+// If the reader is blocked waiting for more data, the writer will wake it up if
+// either the buffer is more than half full, or when the writer sets the eof
+// marker or writes overflow entries (described below.)
+//
// If the writer gets ahead of the reader, so that the buffer fills,
// future writes are discarded and replaced in the output stream by an
// overflow entry, which has size 2+hdrsize+1, time set to the time of
@@ -378,11 +382,28 @@ func (b *profBuf) write(tagPtr *unsafe.Pointer, now int64, hdr []uint64, stk []u
// Racing with reader setting flag bits in b.w, to avoid lost wakeups.
old := b.w.load()
new := old.addCountsAndClearFlags(skip+2+len(stk)+int(b.hdrsize), 1)
+ // We re-load b.r here to reduce the likelihood of early wakeups
+ // if the reader already consumed some data between the last
+ // time we read b.r and now. This isn't strictly necessary.
+ unread := countSub(new.dataCount(), b.r.load().dataCount())
+ if unread < 0 {
+ // The new count overflowed and wrapped around.
+ unread += len(b.data)
+ }
+ wakeupThreshold := len(b.data) / 2
+ if unread < wakeupThreshold {
+ // Carry over the sleeping flag since we're not planning
+ // to wake the reader yet
+ new |= old & profReaderSleeping
+ }
if !b.w.cas(old, new) {
continue
}
- // If there was a reader, wake it up.
- if old&profReaderSleeping != 0 {
+ // If we've hit our high watermark for data in the buffer,
+ // and there is a reader, wake it up.
+ if unread >= wakeupThreshold && old&profReaderSleeping != 0 {
+ // NB: if we reach this point, then the sleeping bit is
+ // cleared in the new b.w value
notewakeup(&b.wait)
}
break
diff --git a/src/runtime/profbuf_test.go b/src/runtime/profbuf_test.go
index 7a435c9e5f..2f068ac386 100644
--- a/src/runtime/profbuf_test.go
+++ b/src/runtime/profbuf_test.go
@@ -5,8 +5,12 @@
package runtime_test
import (
+ "fmt"
+ "regexp"
+ "runtime"
. "runtime"
"slices"
+ "sync"
"testing"
"time"
"unsafe"
@@ -190,3 +194,75 @@ func TestProfBufDoubleWakeup(t *testing.T) {
}
}
}
+
+func TestProfBufWakeup(t *testing.T) {
+ b := NewProfBuf(2, 16, 2)
+ var wg sync.WaitGroup
+ wg.Go(func() {
+ read := 0
+ for {
+ rdata, _, eof := b.Read(ProfBufBlocking)
+ if read == 0 && len(rdata) < 8 {
+ t.Errorf("first wake up at less than half full, got %x", rdata)
+ }
+ read += len(rdata)
+ if eof {
+ return
+ }
+ }
+ })
+
+ // Under the hood profBuf uses notetsleepg when the reader blocks.
+ // Different platforms have different implementations, leading to
+ // different statuses we need to look for to determine whether the
+ // reader is blocked.
+ var waitStatus string
+ switch runtime.GOOS {
+ case "js":
+ waitStatus = "waiting"
+ case "wasip1":
+ waitStatus = "runnable"
+ default:
+ waitStatus = "syscall"
+ }
+
+ // Ensure that the reader is blocked
+ awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
+ // NB: this writes 6 words not 4. Fine for the test.
+ // The reader shouldn't wake up for this
+ b.Write(nil, 1, []uint64{1, 2}, []uintptr{3, 4})
+
+ // The reader should still be blocked
+ //
+ // TODO(nick): this is racy. We could Gosched and still have the reader
+ // blocked in a buggy implementation because it just didn't get a chance
+ // to run
+ awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1")
+ b.Write(nil, 1, []uint64{5, 6}, []uintptr{7, 8})
+ b.Close()
+
+ // Wait here so we can be sure the reader got the data
+ wg.Wait()
+}
+
+// see also runtime/pprof tests
+func awaitBlockedGoroutine(state, fName string) {
+ re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime_test\.%s`, regexp.QuoteMeta(state), fName)
+ r := regexp.MustCompile(re)
+
+ buf := make([]byte, 64<<10)
+ for {
+ Gosched()
+ n := Stack(buf, true)
+ if n == len(buf) {
+ // Buffer wasn't large enough for a full goroutine dump.
+ // Resize it and try again.
+ buf = make([]byte, 2*len(buf))
+ continue
+ }
+ const count = 1
+ if len(r.FindAll(buf[:n], -1)) >= count {
+ return
+ }
+ }
+}