diff options
| author | Matthew Dempsky <mdempsky@google.com> | 2023-11-20 06:27:20 +0000 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2023-11-20 13:20:29 +0000 |
| commit | 468bc941886ae83b8d16af5deaaf12ea5cabb05b (patch) | |
| tree | 1d008384170d873009c247dcef90da14ac1afb95 /src/runtime/mprof.go | |
| parent | 237715cf7c4c2aabf6d3c04473488c1b2477f8ea (diff) | |
| download | go-468bc941886ae83b8d16af5deaaf12ea5cabb05b.tar.xz | |
Revert "runtime: profile contended lock calls"
This reverts commit go.dev/cl/528657.
Reason for revert: broke a lot of builders.
Change-Id: I70c33062020e997c4df67b3eaa2e886cf0da961e
Reviewed-on: https://go-review.googlesource.com/c/go/+/543660
Reviewed-by: Than McIntosh <thanm@google.com>
Auto-Submit: Matthew Dempsky <mdempsky@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime/mprof.go')
| -rw-r--r-- | src/runtime/mprof.go | 236 |
1 files changed, 1 insertions, 235 deletions
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index b1930b3020..af461eef1a 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -234,10 +234,6 @@ func newBucket(typ bucketType, nstk int) *bucket { // stk returns the slice in b holding the stack. func (b *bucket) stk() []uintptr { stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) - if b.nstk > maxStack { - // prove that slicing works; otherwise a failure requires a P - throw("bad profile stack count") - } return stk[:b.nstk:b.nstk] } @@ -513,237 +509,7 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) { } else { nstk = gcallers(gp.m.curg, skip, stk[:]) } - - saveBlockEventStack(cycles, rate, stk[:nstk], which) -} - -// lockTimer assists with profiling contention on runtime-internal locks. -// -// There are several steps between the time that an M experiences contention and -// when that contention may be added to the profile. This comes from our -// constraints: We need to keep the critical section of each lock small, -// especially when those locks are contended. The reporting code cannot acquire -// new locks until the M has released all other locks, which means no memory -// allocations and encourages use of (temporary) M-local storage. -// -// The M will have space for storing one call stack that caused contention, and -// for the magnitude of that contention. It will also have space to store the -// magnitude of additional contention the M caused, since it only has space to -// remember one call stack and might encounter several contention events before -// it releases all of its locks and is thus able to transfer the local buffer -// into the profile. -// -// The M will collect the call stack when it unlocks the contended lock. That -// minimizes the impact on the critical section of the contended lock, and -// matches the mutex profile's behavior for contention in sync.Mutex: measured -// at the Unlock method. -// -// The profile for contention on sync.Mutex blames the caller of Unlock for the -// amount of contention experienced by the callers of Lock which had to wait. -// When there are several critical sections, this allows identifying which of -// them is responsible. -// -// Matching that behavior for runtime-internal locks will require identifying -// which Ms are blocked on the mutex. The semaphore-based implementation is -// ready to allow that, but the futex-based implementation will require a bit -// more work. Until then, we report contention on runtime-internal locks with a -// call stack taken from the unlock call (like the rest of the user-space -// "mutex" profile), but assign it a duration value based on how long the -// previous lock call took (like the user-space "block" profile). -// -// Thus, reporting the call stacks of runtime-internal lock contention is -// guarded by GODEBUG for now. Set GODEBUG=profileruntimelocks=1 to enable. -// -// TODO(rhysh): plumb through the delay duration, remove GODEBUG, update comment -// -// The M will track this by storing a pointer to the lock; lock/unlock pairs for -// runtime-internal locks are always on the same M. -// -// Together, that demands several steps for recording contention. First, when -// finally acquiring a contended lock, the M decides whether it should plan to -// profile that event by storing a pointer to the lock in its "to be profiled -// upon unlock" field. If that field is already set, it uses the relative -// magnitudes to weight a random choice between itself and the other lock, with -// the loser's time being added to the "additional contention" field. Otherwise -// if the M's call stack buffer is occupied, it does the comparison against that -// sample's magnitude. -// -// Second, having unlocked a mutex the M checks to see if it should capture the -// call stack into its local buffer. Finally, when the M unlocks its last mutex, -// it transfers the local buffer into the profile. As part of that step, it also -// transfers any "additional contention" time to the profile. Any lock -// contention that it experiences while adding samples to the profile will be -// recorded later as "additional contention" and not include a call stack, to -// avoid an echo. -type lockTimer struct { - lock *mutex - timeRate int64 - timeStart int64 - tickStart int64 -} - -func (lt *lockTimer) begin() { - rate := int64(atomic.Load64(&mutexprofilerate)) - - lt.timeRate = gTrackingPeriod - if rate != 0 && rate < lt.timeRate { - lt.timeRate = rate - } - if int64(fastrand())%lt.timeRate == 0 { - lt.timeStart = nanotime() - } - - if rate > 0 && int64(fastrand())%rate == 0 { - lt.tickStart = cputicks() - } -} - -func (lt *lockTimer) end() { - gp := getg() - - if lt.timeStart != 0 { - nowTime := nanotime() - gp.m.mLockProfile.waitTime.Add((nowTime - lt.timeStart) * lt.timeRate) - } - - if lt.tickStart != 0 { - nowTick := cputicks() - gp.m.mLockProfile.recordLock(nowTick-lt.tickStart, lt.lock) - } -} - -type mLockProfile struct { - waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank - stack [maxStack]uintptr // stack that experienced contention in runtime.lockWithRank - pending uintptr // *mutex that experienced contention (to be traceback-ed) - cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack" - cyclesLost int64 // contention for which we weren't able to record a call stack - disabled bool // attribute all time to "lost" -} - -func (prof *mLockProfile) recordLock(cycles int64, l *mutex) { - if cycles <= 0 { - return - } - - if prof.disabled { - // We're experiencing contention while attempting to report contention. - // Make a note of its magnitude, but don't allow it to be the sole cause - // of another contention report. - prof.cyclesLost += cycles - return - } - - if uintptr(unsafe.Pointer(l)) == prof.pending { - // Optimization: we'd already planned to profile this same lock (though - // possibly from a different unlock site). - prof.cycles += cycles - return - } - - if prev := prof.cycles; prev > 0 { - // We can only store one call stack for runtime-internal lock contention - // on this M, and we've already got one. Decide which should stay, and - // add the other to the report for runtime._LostContendedLock. - prevScore := fastrand64() % uint64(prev) - thisScore := fastrand64() % uint64(cycles) - if prevScore > thisScore { - prof.cyclesLost += cycles - return - } else { - prof.cyclesLost += prev - } - } - // Saving the *mutex as a uintptr is safe because: - // - lockrank_on.go does this too, which gives it regular exercise - // - the lock would only move if it's stack allocated, which means it - // cannot experience multi-M contention - prof.pending = uintptr(unsafe.Pointer(l)) - prof.cycles = cycles -} - -// From unlock2, we might not be holding a p in this code. -// -//go:nowritebarrierrec -func (prof *mLockProfile) recordUnlock(l *mutex) { - if uintptr(unsafe.Pointer(l)) == prof.pending { - prof.captureStack() - } - if gp := getg(); gp.m.locks == 1 && gp.m.mLockProfile.cycles != 0 { - prof.store() - } -} - -func (prof *mLockProfile) captureStack() { - skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank - if staticLockRanking { - // When static lock ranking is enabled, we'll always be on the system - // stack at this point. There will be a runtime.unlockWithRank.func1 - // frame, and if the call to runtime.unlock took place on a user stack - // then there'll also be a runtime.systemstack frame. To keep stack - // traces somewhat consistent whether or not static lock ranking is - // enabled, we'd like to skip those. But it's hard to tell how long - // we've been on the system stack so accept an extra frame in that case, - // with a leaf of "runtime.unlockWithRank runtime.unlock" instead of - // "runtime.unlock". - skip += 1 // runtime.unlockWithRank.func1 - } - prof.pending = 0 - - if debug.profileruntimelocks.Load() == 0 { - prof.stack[0] = abi.FuncPCABIInternal(_LostContendedLock) + sys.PCQuantum - prof.stack[1] = 0 - return - } - - var nstk int - gp := getg() - sp := getcallersp() - pc := getcallerpc() - systemstack(func() { - var u unwinder - u.initAt(pc, sp, 0, gp, unwindSilentErrors|unwindJumpStack) - nstk = tracebackPCs(&u, skip, prof.stack[:]) - }) - if nstk < len(prof.stack) { - prof.stack[nstk] = 0 - } -} - -func (prof *mLockProfile) store() { - // Report any contention we experience within this function as "lost"; it's - // important that the act of reporting a contention event not lead to a - // reportable contention event. This also means we can use prof.stack - // without copying, since it won't change during this function. - mp := acquirem() - prof.disabled = true - - nstk := maxStack - for i := 0; i < nstk; i++ { - if pc := prof.stack[i]; pc == 0 { - nstk = i - break - } - } - - cycles, lost := prof.cycles, prof.cyclesLost - prof.cycles, prof.cyclesLost = 0, 0 - - rate := int64(atomic.Load64(&mutexprofilerate)) - saveBlockEventStack(cycles, rate, prof.stack[:nstk], mutexProfile) - if lost > 0 { - lostStk := [...]uintptr{ - abi.FuncPCABIInternal(_LostContendedLock) + sys.PCQuantum, - } - saveBlockEventStack(lost, rate, lostStk[:], mutexProfile) - } - - prof.disabled = false - releasem(mp) -} - -func saveBlockEventStack(cycles, rate int64, stk []uintptr, which bucketType) { - b := stkbucket(which, 0, stk, true) + b := stkbucket(which, 0, stk[:nstk], true) bp := b.bp() lock(&profBlockLock) |
