diff options
| author | Nick Ripley <nick.ripley@datadoghq.com> | 2025-11-21 16:01:29 +0000 |
|---|---|---|
| committer | Nick Ripley <nick.ripley@datadoghq.com> | 2025-11-21 13:25:29 -0800 |
| commit | 121bc3e464b901327a5c138d8a992bb85c440862 (patch) | |
| tree | e3fd8bb4869b0033cae4c8be8beef7188d85eddd | |
| parent | b604148c4e8ad61640d12cde40379bc3c137d8a6 (diff) | |
| download | go-121bc3e464b901327a5c138d8a992bb85c440862.tar.xz | |
runtime/pprof: remove hard-coded sleep in CPU profile reader
The CPU profiler reader goroutine has a hard-coded 100ms sleep between
reads of the CPU profile sample buffer. This is done because waking up
the CPU profile reader is not signal-safe on some platforms. As a
consequence, stopping the profiler takes 200ms (one iteration to read
the last samples and one to see the "eof"), and on many-core systems the
reader does not wake up frequently enought to keep up with incoming
data.
This CL removes the sleep where it is safe to do so, following a
suggestion by Austin Clements in the comments on CL 445375. We let the
reader fully block, and wake up the reader when the buffer is over
half-full.
Fixes #63043
Updates #56029
Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest,gotip-linux-arm64-longtest,gotip-linux-386-longtest
Change-Id: I9f7e7e9918a4a6f16e80f6aaf33103126568a81f
Reviewed-on: https://go-review.googlesource.com/c/go/+/610815
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Mark Freeman <markfreeman@google.com>
| -rw-r--r-- | src/runtime/pprof/pprof.go | 5 | ||||
| -rw-r--r-- | src/runtime/profbuf.go | 25 | ||||
| -rw-r--r-- | src/runtime/profbuf_test.go | 76 |
3 files changed, 103 insertions, 3 deletions
diff --git a/src/runtime/pprof/pprof.go b/src/runtime/pprof/pprof.go index 78d00af6ca..c617a8b26a 100644 --- a/src/runtime/pprof/pprof.go +++ b/src/runtime/pprof/pprof.go @@ -924,7 +924,10 @@ func profileWriter(w io.Writer) { b := newProfileBuilder(w) var err error for { - time.Sleep(100 * time.Millisecond) + if runtime.GOOS == "darwin" || runtime.GOOS == "ios" { + // see runtime_pprof_readProfile + time.Sleep(100 * time.Millisecond) + } data, tags, eof := readProfile() if e := b.addCPUData(data, tags); e != nil && err == nil { err = e diff --git a/src/runtime/profbuf.go b/src/runtime/profbuf.go index 40aff9e1d2..147212206f 100644 --- a/src/runtime/profbuf.go +++ b/src/runtime/profbuf.go @@ -38,6 +38,10 @@ import ( // be returned by read. By definition, r ≤ rNext ≤ w (before wraparound), // and rNext is only used by the reader, so it can be accessed without atomics. // +// If the reader is blocked waiting for more data, the writer will wake it up if +// either the buffer is more than half full, or when the writer sets the eof +// marker or writes overflow entries (described below.) +// // If the writer gets ahead of the reader, so that the buffer fills, // future writes are discarded and replaced in the output stream by an // overflow entry, which has size 2+hdrsize+1, time set to the time of @@ -378,11 +382,28 @@ func (b *profBuf) write(tagPtr *unsafe.Pointer, now int64, hdr []uint64, stk []u // Racing with reader setting flag bits in b.w, to avoid lost wakeups. old := b.w.load() new := old.addCountsAndClearFlags(skip+2+len(stk)+int(b.hdrsize), 1) + // We re-load b.r here to reduce the likelihood of early wakeups + // if the reader already consumed some data between the last + // time we read b.r and now. This isn't strictly necessary. + unread := countSub(new.dataCount(), b.r.load().dataCount()) + if unread < 0 { + // The new count overflowed and wrapped around. + unread += len(b.data) + } + wakeupThreshold := len(b.data) / 2 + if unread < wakeupThreshold { + // Carry over the sleeping flag since we're not planning + // to wake the reader yet + new |= old & profReaderSleeping + } if !b.w.cas(old, new) { continue } - // If there was a reader, wake it up. - if old&profReaderSleeping != 0 { + // If we've hit our high watermark for data in the buffer, + // and there is a reader, wake it up. + if unread >= wakeupThreshold && old&profReaderSleeping != 0 { + // NB: if we reach this point, then the sleeping bit is + // cleared in the new b.w value notewakeup(&b.wait) } break diff --git a/src/runtime/profbuf_test.go b/src/runtime/profbuf_test.go index 7a435c9e5f..2f068ac386 100644 --- a/src/runtime/profbuf_test.go +++ b/src/runtime/profbuf_test.go @@ -5,8 +5,12 @@ package runtime_test import ( + "fmt" + "regexp" + "runtime" . "runtime" "slices" + "sync" "testing" "time" "unsafe" @@ -190,3 +194,75 @@ func TestProfBufDoubleWakeup(t *testing.T) { } } } + +func TestProfBufWakeup(t *testing.T) { + b := NewProfBuf(2, 16, 2) + var wg sync.WaitGroup + wg.Go(func() { + read := 0 + for { + rdata, _, eof := b.Read(ProfBufBlocking) + if read == 0 && len(rdata) < 8 { + t.Errorf("first wake up at less than half full, got %x", rdata) + } + read += len(rdata) + if eof { + return + } + } + }) + + // Under the hood profBuf uses notetsleepg when the reader blocks. + // Different platforms have different implementations, leading to + // different statuses we need to look for to determine whether the + // reader is blocked. + var waitStatus string + switch runtime.GOOS { + case "js": + waitStatus = "waiting" + case "wasip1": + waitStatus = "runnable" + default: + waitStatus = "syscall" + } + + // Ensure that the reader is blocked + awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1") + // NB: this writes 6 words not 4. Fine for the test. + // The reader shouldn't wake up for this + b.Write(nil, 1, []uint64{1, 2}, []uintptr{3, 4}) + + // The reader should still be blocked + // + // TODO(nick): this is racy. We could Gosched and still have the reader + // blocked in a buggy implementation because it just didn't get a chance + // to run + awaitBlockedGoroutine(waitStatus, "TestProfBufWakeup.func1") + b.Write(nil, 1, []uint64{5, 6}, []uintptr{7, 8}) + b.Close() + + // Wait here so we can be sure the reader got the data + wg.Wait() +} + +// see also runtime/pprof tests +func awaitBlockedGoroutine(state, fName string) { + re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime_test\.%s`, regexp.QuoteMeta(state), fName) + r := regexp.MustCompile(re) + + buf := make([]byte, 64<<10) + for { + Gosched() + n := Stack(buf, true) + if n == len(buf) { + // Buffer wasn't large enough for a full goroutine dump. + // Resize it and try again. + buf = make([]byte, 2*len(buf)) + continue + } + const count = 1 + if len(r.FindAll(buf[:n], -1)) >= count { + return + } + } +} |
