diff options
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/debug.go | 11 | ||||
| -rw-r--r-- | src/runtime/export_test.go | 21 | ||||
| -rw-r--r-- | src/runtime/extern.go | 8 | ||||
| -rw-r--r-- | src/runtime/lock_futex.go | 6 | ||||
| -rw-r--r-- | src/runtime/lock_sema.go | 4 | ||||
| -rw-r--r-- | src/runtime/metrics.go | 2 | ||||
| -rw-r--r-- | src/runtime/metrics_test.go | 308 | ||||
| -rw-r--r-- | src/runtime/mprof.go | 236 | ||||
| -rw-r--r-- | src/runtime/proc.go | 4 | ||||
| -rw-r--r-- | src/runtime/runtime1.go | 42 | ||||
| -rw-r--r-- | src/runtime/runtime2.go | 8 |
11 files changed, 627 insertions, 23 deletions
diff --git a/src/runtime/debug.go b/src/runtime/debug.go index 0e61692f3d..3233ce8ee7 100644 --- a/src/runtime/debug.go +++ b/src/runtime/debug.go @@ -52,6 +52,17 @@ func NumCgoCall() int64 { return n } +func totalMutexWaitTimeNanos() int64 { + total := sched.totalMutexWaitTime.Load() + + total += sched.totalRuntimeLockWaitTime.Load() + for mp := (*m)(atomic.Loadp(unsafe.Pointer(&allm))); mp != nil; mp = mp.alllink { + total += mp.mLockProfile.waitTime.Load() + } + + return total +} + // NumGoroutine returns the number of goroutines that currently exist. func NumGoroutine() int { return int(gcount()) diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index d2f3563956..5f0e24d4f1 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1340,6 +1340,27 @@ func PageCachePagesLeaked() (leaked uintptr) { return } +type Mutex = mutex + +var Lock = lock +var Unlock = unlock + +func MutexContended(l *mutex) bool { + switch atomic.Loaduintptr(&l.key) { + case 0: // unlocked + return false + case 1: // locked + return false + default: // an M is sleeping + return true + } +} + +func SemRootLock(addr *uint32) *mutex { + root := semtable.rootFor(addr) + return &root.lock +} + var Semacquire = semacquire var Semrelease1 = semrelease1 diff --git a/src/runtime/extern.go b/src/runtime/extern.go index f8db296e6b..d199720b9b 100644 --- a/src/runtime/extern.go +++ b/src/runtime/extern.go @@ -145,6 +145,14 @@ It is a comma-separated list of name=val pairs setting these named variables: risk in that scenario. Currently not supported on Windows, plan9 or js/wasm. Setting this option for some applications can produce large traces, so use with care. + profileruntimelocks: setting profileruntimelocks=1 includes call stacks related to + contention on runtime-internal locks in the "mutex" profile, subject to the + MutexProfileFraction setting. The call stacks will correspond to the unlock call that + released the lock. But instead of the value corresponding to the amount of contention that + call stack caused, it corresponds to the amount of time the caller of unlock had to wait + in its original call to lock. A future release is expected to align those and remove this + setting. + invalidptr: invalidptr=1 (the default) causes the garbage collector and stack copier to crash the program if an invalid pointer value (for example, 1) is found in a pointer-typed location. Setting invalidptr=0 disables this check. diff --git a/src/runtime/lock_futex.go b/src/runtime/lock_futex.go index cc7d465ef1..b4f57d5259 100644 --- a/src/runtime/lock_futex.go +++ b/src/runtime/lock_futex.go @@ -71,6 +71,8 @@ func lock2(l *mutex) { // its wakeup call. wait := v + timer := &lockTimer{lock: l} + timer.begin() // On uniprocessors, no point spinning. // On multiprocessors, spin for ACTIVE_SPIN attempts. spin := 0 @@ -82,6 +84,7 @@ func lock2(l *mutex) { for i := 0; i < spin; i++ { for l.key == mutex_unlocked { if atomic.Cas(key32(&l.key), mutex_unlocked, wait) { + timer.end() return } } @@ -92,6 +95,7 @@ func lock2(l *mutex) { for i := 0; i < passive_spin; i++ { for l.key == mutex_unlocked { if atomic.Cas(key32(&l.key), mutex_unlocked, wait) { + timer.end() return } } @@ -101,6 +105,7 @@ func lock2(l *mutex) { // Sleep. v = atomic.Xchg(key32(&l.key), mutex_sleeping) if v == mutex_unlocked { + timer.end() return } wait = mutex_sleeping @@ -122,6 +127,7 @@ func unlock2(l *mutex) { } gp := getg() + gp.m.mLockProfile.recordUnlock(l) gp.m.locks-- if gp.m.locks < 0 { throw("runtimeĀ·unlock: lock count") diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go index 9afba08b0b..84cd344db8 100644 --- a/src/runtime/lock_sema.go +++ b/src/runtime/lock_sema.go @@ -48,6 +48,8 @@ func lock2(l *mutex) { } semacreate(gp.m) + timer := &lockTimer{lock: l} + timer.begin() // On uniprocessor's, no point spinning. // On multiprocessors, spin for ACTIVE_SPIN attempts. spin := 0 @@ -60,6 +62,7 @@ Loop: if v&locked == 0 { // Unlocked. Try to lock. if atomic.Casuintptr(&l.key, v, v|locked) { + timer.end() return } i = 0 @@ -119,6 +122,7 @@ func unlock2(l *mutex) { } } } + gp.m.mLockProfile.recordUnlock(l) gp.m.locks-- if gp.m.locks < 0 { throw("runtimeĀ·unlock: lock count") diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 4bd167135d..f97a3804ab 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -470,7 +470,7 @@ func initMetrics() { "/sync/mutex/wait/total:seconds": { compute: func(_ *statAggregate, out *metricValue) { out.kind = metricKindFloat64 - out.scalar = float64bits(nsToSec(sched.totalMutexWaitTime.Load())) + out.scalar = float64bits(nsToSec(totalMutexWaitTimeNanos())) }, }, } diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 1e82897381..56f3340b16 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -6,15 +6,21 @@ package runtime_test import ( "bytes" + "fmt" + "internal/goexperiment" + "internal/profile" "os" "reflect" "runtime" "runtime/debug" "runtime/metrics" + "runtime/pprof" "runtime/trace" + "slices" "sort" "strings" "sync" + "sync/atomic" "testing" "time" "unsafe" @@ -939,3 +945,305 @@ func TestSchedPauseMetrics(t *testing.T) { }) } } + +func TestRuntimeLockMetricsAndProfile(t *testing.T) { + old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests + defer runtime.SetMutexProfileFraction(old) + if old != 0 { + t.Fatalf("need MutexProfileRate 0, got %d", old) + } + + { + before := os.Getenv("GODEBUG") + for _, s := range strings.Split(before, ",") { + if strings.HasPrefix(s, "profileruntimelocks=") { + t.Logf("GODEBUG includes explicit setting %q", s) + } + } + defer func() { os.Setenv("GODEBUG", before) }() + os.Setenv("GODEBUG", fmt.Sprintf("%s,profileruntimelocks=1", before)) + } + + loadProfile := func(t *testing.T) *profile.Profile { + var w bytes.Buffer + pprof.Lookup("mutex").WriteTo(&w, 0) + p, err := profile.Parse(&w) + if err != nil { + t.Fatalf("failed to parse profile: %v", err) + } + if err := p.CheckValid(); err != nil { + t.Fatalf("invalid profile: %v", err) + } + return p + } + + measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) { + beforeProfile := loadProfile(t) + beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} + metrics.Read(beforeMetrics) + + fn() + + afterProfile := loadProfile(t) + afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} + metrics.Read(afterMetrics) + + sumSamples := func(p *profile.Profile, i int) int64 { + var sum int64 + for _, s := range p.Sample { + sum += s.Value[i] + } + return sum + } + + metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64() + profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds() + + // The internal/profile package does not support compaction; this delta + // profile will include separate positive and negative entries. + p = afterProfile.Copy() + if len(beforeProfile.Sample) > 0 { + err := p.Merge(beforeProfile, -1) + if err != nil { + t.Fatalf("Merge profiles: %v", err) + } + } + + return metricGrowth, profileGrowth, p + } + + testcase := func(stk []string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { + return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { + metricGrowth, profileGrowth, p := measureDelta(t, func() { + var started, stopped sync.WaitGroup + started.Add(workers) + stopped.Add(workers) + for i := 0; i < workers; i++ { + w := &contentionWorker{ + before: func() { + started.Done() + started.Wait() + }, + after: func() { + stopped.Done() + }, + fn: fn, + } + go w.run() + } + stopped.Wait() + }) + + if profileGrowth == 0 { + t.Errorf("no increase in mutex profile") + } + if metricGrowth == 0 { + t.Errorf("no increase in /sync/mutex/wait/total:seconds metric") + } + // This comparison is possible because the time measurements in support of + // runtime/pprof and runtime/metrics for runtime-internal locks are so close + // together. It doesn't work as well for user-space contention, where the + // involved goroutines are not _Grunnable the whole time and so need to pass + // through the scheduler. + t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth) + t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth) + + if goexperiment.StaticLockRanking { + if !slices.ContainsFunc(stk, func(s string) bool { + return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart" + }) { + // stk is a call stack that is still on the user stack when + // it calls runtime.unlock. Add the extra function that + // we'll see, when the static lock ranking implementation of + // runtime.unlockWithRank switches to the system stack. + stk = append([]string{"runtime.unlockWithRank"}, stk...) + } + } + + var stks [][]string + for _, s := range p.Sample { + var have []string + for _, loc := range s.Location { + for _, line := range loc.Line { + have = append(have, line.Function.Name) + } + } + stks = append(stks, have) + if slices.Equal(have, stk) { + n += s.Value[0] + value += s.Value[1] + } + } + t.Logf("stack %v has samples totaling n=%d value=%d", stk, n, value) + if n == 0 && value == 0 { + t.Logf("profile:\n%s", p) + for _, have := range stks { + t.Logf("have stack %v", have) + } + t.Errorf("want stack %v", stk) + } + + return metricGrowth, profileGrowth, n, value + } + } + + name := t.Name() + + t.Run("runtime.lock", func(t *testing.T) { + mus := make([]runtime.Mutex, 100) + var needContention atomic.Int64 + delay := 10 * time.Microsecond + delayMicros := delay.Microseconds() + + // The goroutine that acquires the lock will only proceed when it + // detects that its partner is contended for the lock. That will lead to + // live-lock if anything (such as a STW) prevents the partner goroutine + // from running. Allowing the contention workers to pause and restart + // (to allow a STW to proceed) makes it harder to confirm that we're + // counting the correct number of contention events, since some locks + // will end up contended twice. Instead, disable the GC. + defer debug.SetGCPercent(debug.SetGCPercent(-1)) + + const workers = 2 + if runtime.GOMAXPROCS(0) < workers { + t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers) + } + + fn := func() bool { + n := int(needContention.Load()) + if n < 0 { + return false + } + mu := &mus[n] + + runtime.Lock(mu) + for int(needContention.Load()) == n { + if runtime.MutexContended(mu) { + // make them wait a little while + for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; { + runtime.Usleep(uint32(delayMicros)) + } + break + } + } + runtime.Unlock(mu) + needContention.Store(int64(n - 1)) + + return true + } + + stk := []string{ + "runtime.unlock", + "runtime_test." + name + ".func5.1", + "runtime_test.(*contentionWorker).run", + } + + t.Run("sample-1", func(t *testing.T) { + old := runtime.SetMutexProfileFraction(1) + defer runtime.SetMutexProfileFraction(old) + + needContention.Store(int64(len(mus) - 1)) + metricGrowth, profileGrowth, n, _ := testcase(stk, workers, fn)(t) + + if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want { + // The test imposes a delay with usleep, verified with calls to + // nanotime. Compare against the runtime/metrics package's view + // (based on nanotime) rather than runtime/pprof's view (based + // on cputicks). + t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want) + } + if have, want := n, int64(len(mus)); have != want { + t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want) + } + + const slop = 1.5 // account for nanotime vs cputicks + if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth { + t.Errorf("views differ by more than %fx", slop) + } + }) + + t.Run("sample-2", func(t *testing.T) { + old := runtime.SetMutexProfileFraction(2) + defer runtime.SetMutexProfileFraction(old) + + needContention.Store(int64(len(mus) - 1)) + metricGrowth, profileGrowth, n, _ := testcase(stk, workers, fn)(t) + + // With 100 trials and profile fraction of 2, we expect to capture + // 50 samples. Allow the test to pass if we get at least 20 samples; + // the CDF of the binomial distribution says there's less than a + // 1e-9 chance of that, which is an acceptably low flakiness rate. + const samplingSlop = 2.5 + + if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want { + // The test imposes a delay with usleep, verified with calls to + // nanotime. Compare against the runtime/metrics package's view + // (based on nanotime) rather than runtime/pprof's view (based + // on cputicks). + t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want) + } + if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop { + t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want) + } + + const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling + if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth { + t.Errorf("views differ by more than %fx", timerSlop) + } + }) + }) + + t.Run("runtime.semrelease", func(t *testing.T) { + old := runtime.SetMutexProfileFraction(1) + defer runtime.SetMutexProfileFraction(old) + + const workers = 3 + if runtime.GOMAXPROCS(0) < workers { + t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers) + } + + var sem uint32 = 1 + var sawContention atomic.Int32 + var need int32 = 1000 // counteract low timer resolution by requiring more samples + fn := func() bool { + if sawContention.Load() >= need { + return false + } + runtime.Semacquire(&sem) + runtime.Semrelease1(&sem, false, 0) + if runtime.MutexContended(runtime.SemRootLock(&sem)) { + sawContention.Add(1) + } + return true + } + + stk := []string{ + "runtime.unlock", + "runtime.semrelease1", + "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1", + "runtime_test.(*contentionWorker).run", + } + + // Verify that we get call stack we expect, with anything more than zero + // nanoseconds / zero samples. The duration of each contention event is + // too small relative to the expected overhead for us to verify its + // value more directly. Leave that to the explicit lock/unlock test. + + testcase(stk, workers, fn)(t) + }) +} + +// contentionWorker provides cleaner call stacks for lock contention profile tests +type contentionWorker struct { + before func() + fn func() bool + after func() +} + +func (w *contentionWorker) run() { + defer w.after() + w.before() + + for w.fn() { + } +} diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index af461eef1a..b1930b3020 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -234,6 +234,10 @@ func newBucket(typ bucketType, nstk int) *bucket { // stk returns the slice in b holding the stack. func (b *bucket) stk() []uintptr { stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) + if b.nstk > maxStack { + // prove that slicing works; otherwise a failure requires a P + throw("bad profile stack count") + } return stk[:b.nstk:b.nstk] } @@ -509,7 +513,237 @@ func saveblockevent(cycles, rate int64, skip int, which bucketType) { } else { nstk = gcallers(gp.m.curg, skip, stk[:]) } - b := stkbucket(which, 0, stk[:nstk], true) + + saveBlockEventStack(cycles, rate, stk[:nstk], which) +} + +// lockTimer assists with profiling contention on runtime-internal locks. +// +// There are several steps between the time that an M experiences contention and +// when that contention may be added to the profile. This comes from our +// constraints: We need to keep the critical section of each lock small, +// especially when those locks are contended. The reporting code cannot acquire +// new locks until the M has released all other locks, which means no memory +// allocations and encourages use of (temporary) M-local storage. +// +// The M will have space for storing one call stack that caused contention, and +// for the magnitude of that contention. It will also have space to store the +// magnitude of additional contention the M caused, since it only has space to +// remember one call stack and might encounter several contention events before +// it releases all of its locks and is thus able to transfer the local buffer +// into the profile. +// +// The M will collect the call stack when it unlocks the contended lock. That +// minimizes the impact on the critical section of the contended lock, and +// matches the mutex profile's behavior for contention in sync.Mutex: measured +// at the Unlock method. +// +// The profile for contention on sync.Mutex blames the caller of Unlock for the +// amount of contention experienced by the callers of Lock which had to wait. +// When there are several critical sections, this allows identifying which of +// them is responsible. +// +// Matching that behavior for runtime-internal locks will require identifying +// which Ms are blocked on the mutex. The semaphore-based implementation is +// ready to allow that, but the futex-based implementation will require a bit +// more work. Until then, we report contention on runtime-internal locks with a +// call stack taken from the unlock call (like the rest of the user-space +// "mutex" profile), but assign it a duration value based on how long the +// previous lock call took (like the user-space "block" profile). +// +// Thus, reporting the call stacks of runtime-internal lock contention is +// guarded by GODEBUG for now. Set GODEBUG=profileruntimelocks=1 to enable. +// +// TODO(rhysh): plumb through the delay duration, remove GODEBUG, update comment +// +// The M will track this by storing a pointer to the lock; lock/unlock pairs for +// runtime-internal locks are always on the same M. +// +// Together, that demands several steps for recording contention. First, when +// finally acquiring a contended lock, the M decides whether it should plan to +// profile that event by storing a pointer to the lock in its "to be profiled +// upon unlock" field. If that field is already set, it uses the relative +// magnitudes to weight a random choice between itself and the other lock, with +// the loser's time being added to the "additional contention" field. Otherwise +// if the M's call stack buffer is occupied, it does the comparison against that +// sample's magnitude. +// +// Second, having unlocked a mutex the M checks to see if it should capture the +// call stack into its local buffer. Finally, when the M unlocks its last mutex, +// it transfers the local buffer into the profile. As part of that step, it also +// transfers any "additional contention" time to the profile. Any lock +// contention that it experiences while adding samples to the profile will be +// recorded later as "additional contention" and not include a call stack, to +// avoid an echo. +type lockTimer struct { + lock *mutex + timeRate int64 + timeStart int64 + tickStart int64 +} + +func (lt *lockTimer) begin() { + rate := int64(atomic.Load64(&mutexprofilerate)) + + lt.timeRate = gTrackingPeriod + if rate != 0 && rate < lt.timeRate { + lt.timeRate = rate + } + if int64(fastrand())%lt.timeRate == 0 { + lt.timeStart = nanotime() + } + + if rate > 0 && int64(fastrand())%rate == 0 { + lt.tickStart = cputicks() + } +} + +func (lt *lockTimer) end() { + gp := getg() + + if lt.timeStart != 0 { + nowTime := nanotime() + gp.m.mLockProfile.waitTime.Add((nowTime - lt.timeStart) * lt.timeRate) + } + + if lt.tickStart != 0 { + nowTick := cputicks() + gp.m.mLockProfile.recordLock(nowTick-lt.tickStart, lt.lock) + } +} + +type mLockProfile struct { + waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank + stack [maxStack]uintptr // stack that experienced contention in runtime.lockWithRank + pending uintptr // *mutex that experienced contention (to be traceback-ed) + cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack" + cyclesLost int64 // contention for which we weren't able to record a call stack + disabled bool // attribute all time to "lost" +} + +func (prof *mLockProfile) recordLock(cycles int64, l *mutex) { + if cycles <= 0 { + return + } + + if prof.disabled { + // We're experiencing contention while attempting to report contention. + // Make a note of its magnitude, but don't allow it to be the sole cause + // of another contention report. + prof.cyclesLost += cycles + return + } + + if uintptr(unsafe.Pointer(l)) == prof.pending { + // Optimization: we'd already planned to profile this same lock (though + // possibly from a different unlock site). + prof.cycles += cycles + return + } + + if prev := prof.cycles; prev > 0 { + // We can only store one call stack for runtime-internal lock contention + // on this M, and we've already got one. Decide which should stay, and + // add the other to the report for runtime._LostContendedLock. + prevScore := fastrand64() % uint64(prev) + thisScore := fastrand64() % uint64(cycles) + if prevScore > thisScore { + prof.cyclesLost += cycles + return + } else { + prof.cyclesLost += prev + } + } + // Saving the *mutex as a uintptr is safe because: + // - lockrank_on.go does this too, which gives it regular exercise + // - the lock would only move if it's stack allocated, which means it + // cannot experience multi-M contention + prof.pending = uintptr(unsafe.Pointer(l)) + prof.cycles = cycles +} + +// From unlock2, we might not be holding a p in this code. +// +//go:nowritebarrierrec +func (prof *mLockProfile) recordUnlock(l *mutex) { + if uintptr(unsafe.Pointer(l)) == prof.pending { + prof.captureStack() + } + if gp := getg(); gp.m.locks == 1 && gp.m.mLockProfile.cycles != 0 { + prof.store() + } +} + +func (prof *mLockProfile) captureStack() { + skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank + if staticLockRanking { + // When static lock ranking is enabled, we'll always be on the system + // stack at this point. There will be a runtime.unlockWithRank.func1 + // frame, and if the call to runtime.unlock took place on a user stack + // then there'll also be a runtime.systemstack frame. To keep stack + // traces somewhat consistent whether or not static lock ranking is + // enabled, we'd like to skip those. But it's hard to tell how long + // we've been on the system stack so accept an extra frame in that case, + // with a leaf of "runtime.unlockWithRank runtime.unlock" instead of + // "runtime.unlock". + skip += 1 // runtime.unlockWithRank.func1 + } + prof.pending = 0 + + if debug.profileruntimelocks.Load() == 0 { + prof.stack[0] = abi.FuncPCABIInternal(_LostContendedLock) + sys.PCQuantum + prof.stack[1] = 0 + return + } + + var nstk int + gp := getg() + sp := getcallersp() + pc := getcallerpc() + systemstack(func() { + var u unwinder + u.initAt(pc, sp, 0, gp, unwindSilentErrors|unwindJumpStack) + nstk = tracebackPCs(&u, skip, prof.stack[:]) + }) + if nstk < len(prof.stack) { + prof.stack[nstk] = 0 + } +} + +func (prof *mLockProfile) store() { + // Report any contention we experience within this function as "lost"; it's + // important that the act of reporting a contention event not lead to a + // reportable contention event. This also means we can use prof.stack + // without copying, since it won't change during this function. + mp := acquirem() + prof.disabled = true + + nstk := maxStack + for i := 0; i < nstk; i++ { + if pc := prof.stack[i]; pc == 0 { + nstk = i + break + } + } + + cycles, lost := prof.cycles, prof.cyclesLost + prof.cycles, prof.cyclesLost = 0, 0 + + rate := int64(atomic.Load64(&mutexprofilerate)) + saveBlockEventStack(cycles, rate, prof.stack[:nstk], mutexProfile) + if lost > 0 { + lostStk := [...]uintptr{ + abi.FuncPCABIInternal(_LostContendedLock) + sys.PCQuantum, + } + saveBlockEventStack(lost, rate, lostStk[:], mutexProfile) + } + + prof.disabled = false + releasem(mp) +} + +func saveBlockEventStack(cycles, rate int64, stk []uintptr, which bucketType) { + b := stkbucket(which, 0, stk, true) bp := b.bp() lock(&profBlockLock) diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 824b0fa009..6b13d3b2f6 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -919,7 +919,7 @@ func mcommoninit(mp *m, id int64) { // when it is just in a register or thread-local storage. mp.alllink = allm - // NumCgoCall() iterates over allm w/o schedlock, + // NumCgoCall() and others iterate over allm w/o schedlock, // so we need to publish it safely. atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp)) unlock(&sched.lock) @@ -1852,6 +1852,7 @@ found: unlock(&sched.lock) atomic.Xadd64(&ncgocall, int64(mp.ncgocall)) + sched.totalRuntimeLockWaitTime.Add(mp.mLockProfile.waitTime.Load()) // Release the P. handoffp(releasep()) @@ -5270,6 +5271,7 @@ func _ExternalCode() { _ExternalCode() } func _LostExternalCode() { _LostExternalCode() } func _GC() { _GC() } func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() } +func _LostContendedLock() { _LostContendedLock() } func _VDSO() { _VDSO() } // Called if we receive a SIGPROF signal. diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index 489dcdd79c..877d94eef2 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -307,26 +307,27 @@ type dbgVar struct { // existing int var for that value, which may // already have an initial value. var debug struct { - cgocheck int32 - clobberfree int32 - dontfreezetheworld int32 - efence int32 - gccheckmark int32 - gcpacertrace int32 - gcshrinkstackoff int32 - gcstoptheworld int32 - gctrace int32 - invalidptr int32 - madvdontneed int32 // for Linux; issue 28466 - scavtrace int32 - scheddetail int32 - schedtrace int32 - tracebackancestors int32 - asyncpreemptoff int32 - harddecommit int32 - adaptivestackstart int32 - tracefpunwindoff int32 - traceadvanceperiod int32 + cgocheck int32 + clobberfree int32 + dontfreezetheworld int32 + efence int32 + gccheckmark int32 + gcpacertrace int32 + gcshrinkstackoff int32 + gcstoptheworld int32 + gctrace int32 + invalidptr int32 + madvdontneed int32 // for Linux; issue 28466 + profileruntimelocks atomic.Int32 + scavtrace int32 + scheddetail int32 + schedtrace int32 + tracebackancestors int32 + asyncpreemptoff int32 + harddecommit int32 + adaptivestackstart int32 + tracefpunwindoff int32 + traceadvanceperiod int32 // debug.malloc is used as a combined debug check // in the malloc function and should be set @@ -352,6 +353,7 @@ var dbgvars = []*dbgVar{ {name: "gctrace", value: &debug.gctrace}, {name: "invalidptr", value: &debug.invalidptr}, {name: "madvdontneed", value: &debug.madvdontneed}, + {name: "profileruntimelocks", atomic: &debug.profileruntimelocks}, {name: "sbrk", value: &debug.sbrk}, {name: "scavtrace", value: &debug.scavtrace}, {name: "scheddetail", value: &debug.scheddetail}, diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index e64c3c5695..6bdd66766d 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -593,6 +593,8 @@ type m struct { lockedInt uint32 // tracking for internal lockOSThread nextwaitm muintptr // next m waiting for lock + mLockProfile mLockProfile // fields relating to runtime.lock contention + // wait* are used to carry arguments from gopark into park_m, because // there's no stack to put them on. That is their sole purpose. waitunlockf func(*g, unsafe.Pointer) bool @@ -900,6 +902,12 @@ type schedt struct { // stwTotalTimeOther covers the others. stwTotalTimeGC timeHistogram stwTotalTimeOther timeHistogram + + // totalRuntimeLockWaitTime (plus the value of lockWaitTime on each M in + // allm) is the sum of time goroutines have spent in _Grunnable and with an + // M, but waiting for locks within the runtime. This field stores the value + // for Ms that have exited. + totalRuntimeLockWaitTime atomic.Int64 } // Values for the flags field of a sigTabT. |
