aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/mprof.go
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2023-10-10 15:28:32 -0400
committerGopher Robot <gobot@golang.org>2023-11-15 16:49:45 +0000
commit6ef98ac87c8a4185c0bace496d84cb3b68f069e3 (patch)
tree055a940a2beb55ca09c411bdf9accb49f054f7aa /src/runtime/mprof.go
parenta0df23888fb30c82d8c54c24212442bf56211769 (diff)
downloadgo-6ef98ac87c8a4185c0bace496d84cb3b68f069e3.tar.xz
runtime/metrics: add STW stopping and total time metrics
This CL adds four new time histogram metrics: /sched/pauses/stopping/gc:seconds /sched/pauses/stopping/other:seconds /sched/pauses/total/gc:seconds /sched/pauses/total/other:seconds The "stopping" metrics measure the time taken to start a stop-the-world pause. i.e., how long it takes stopTheWorldWithSema to stop all Ps. This can be used to detect STW struggling to preempt Ps. The "total" metrics measure the total duration of a stop-the-world pause, from starting to stop-the-world until the world is started again. This includes the time spent in the "start" phase. The "gc" metrics are used for GC-related STW pauses. The "other" metrics are used for all other STW pauses. All of these metrics start timing in stopTheWorldWithSema only after successfully acquiring sched.lock, thus excluding lock contention on sched.lock. The reasoning behind this is that while waiting on sched.lock the world is not stopped at all (all other Ps can run), so the impact of this contention is primarily limited to the goroutine attempting to stop-the-world. Additionally, we already have some visibility into sched.lock contention via contention profiles (#57071). /sched/pauses/total/gc:seconds is conceptually equivalent to /gc/pauses:seconds, so the latter is marked as deprecated and returns the same histogram as the former. In the implementation, there are a few minor differences: * For both mark and sweep termination stops, /gc/pauses:seconds started timing prior to calling startTheWorldWithSema, thus including lock contention. These details are minor enough, that I do not believe the slight change in reporting will matter. For mark termination stops, moving timing stop into startTheWorldWithSema does have the side effect of requiring moving other GC metric calculations outside of the STW, as they depend on the same end time. Fixes #63340 Change-Id: Iacd0bab11bedab85d3dcfb982361413a7d9c0d05 Reviewed-on: https://go-review.googlesource.com/c/go/+/534161 Reviewed-by: Michael Knyszek <mknyszek@google.com> Auto-Submit: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime/mprof.go')
-rw-r--r--src/runtime/mprof.go19
1 files changed, 10 insertions, 9 deletions
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index ef485a3cfc..af461eef1a 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -902,7 +902,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
ourg := getg()
- stopTheWorld(stwGoroutineProfile)
+ stw := stopTheWorld(stwGoroutineProfile)
// Using gcount while the world is stopped should give us a consistent view
// of the number of live goroutines, minus the number of goroutines that are
// alive and permanently marked as "system". But to make this count agree
@@ -919,7 +919,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
// There's not enough space in p to store the whole profile, so (per the
// contract of runtime.GoroutineProfile) we're not allowed to write to p
// at all and must return n, false.
- startTheWorld()
+ startTheWorld(stw)
semrelease(&goroutineProfile.sema)
return n, false
}
@@ -953,7 +953,7 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
doRecordGoroutineProfile(fing)
}
}
- startTheWorld()
+ startTheWorld(stw)
// Visit each goroutine that existed as of the startTheWorld call above.
//
@@ -970,12 +970,12 @@ func goroutineProfileWithLabelsConcurrent(p []StackRecord, labels []unsafe.Point
tryRecordGoroutineProfile(gp1, Gosched)
})
- stopTheWorld(stwGoroutineProfileCleanup)
+ stw = stopTheWorld(stwGoroutineProfileCleanup)
endOffset := goroutineProfile.offset.Swap(0)
goroutineProfile.active = false
goroutineProfile.records = nil
goroutineProfile.labels = nil
- startTheWorld()
+ startTheWorld(stw)
// Restore the invariant that every goroutine struct in allgs has its
// goroutineProfiled field cleared.
@@ -1105,7 +1105,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
return gp1 != gp && readgstatus(gp1) != _Gdead && !isSystemGoroutine(gp1, false)
}
- stopTheWorld(stwGoroutineProfile)
+ stw := stopTheWorld(stwGoroutineProfile)
// World is stopped, no locking required.
n = 1
@@ -1161,7 +1161,7 @@ func goroutineProfileWithLabelsSync(p []StackRecord, labels []unsafe.Pointer) (n
raceacquire(unsafe.Pointer(&labelSync))
}
- startTheWorld()
+ startTheWorld(stw)
return n, ok
}
@@ -1190,8 +1190,9 @@ func saveg(pc, sp uintptr, gp *g, r *StackRecord) {
// If all is true, Stack formats stack traces of all other goroutines
// into buf after the trace for the current goroutine.
func Stack(buf []byte, all bool) int {
+ var stw worldStop
if all {
- stopTheWorld(stwAllGoroutinesStack)
+ stw = stopTheWorld(stwAllGoroutinesStack)
}
n := 0
@@ -1218,7 +1219,7 @@ func Stack(buf []byte, all bool) int {
}
if all {
- startTheWorld()
+ startTheWorld(stw)
}
return n
}