aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/metrics_test.go
diff options
context:
space:
mode:
authorMichael Anthony Knyszek <mknyszek@google.com>2025-07-23 17:35:54 +0000
committerGopher Robot <gobot@golang.org>2025-08-15 11:31:00 -0700
commit13df972f6885ebdeba1ea38f0acd99ea0f2bfb49 (patch)
treea21583595b6f0e8a29054840d6b04bba04f5fbe4 /src/runtime/metrics_test.go
parentbd07fafb0a2f979b2be05d9a533182ca55428079 (diff)
downloadgo-13df972f6885ebdeba1ea38f0acd99ea0f2bfb49.tar.xz
runtime/metrics: add metrics for goroutine sched states
This is largely a port of CL 38180. For #15490. Change-Id: I2726111e472e81e9f9f0f294df97872c2689f061 Reviewed-on: https://go-review.googlesource.com/c/go/+/690397 Reviewed-by: Michael Pratt <mpratt@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime/metrics_test.go')
-rw-r--r--src/runtime/metrics_test.go193
1 files changed, 193 insertions, 0 deletions
diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go
index 5787c96084..5b16cbcb22 100644
--- a/src/runtime/metrics_test.go
+++ b/src/runtime/metrics_test.go
@@ -22,6 +22,7 @@ import (
"strings"
"sync"
"sync/atomic"
+ "syscall"
"testing"
"time"
"unsafe"
@@ -1575,3 +1576,195 @@ func TestReadMetricsFinalizers(t *testing.T) {
t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[1].Name, N, v0, v1)
}
}
+
+func TestReadMetricsSched(t *testing.T) {
+ const (
+ notInGo = iota
+ runnable
+ running
+ waiting
+ )
+ var s [4]metrics.Sample
+ s[0].Name = "/sched/goroutines/not-in-go:goroutines"
+ s[1].Name = "/sched/goroutines/runnable:goroutines"
+ s[2].Name = "/sched/goroutines/running:goroutines"
+ s[3].Name = "/sched/goroutines/waiting:goroutines"
+
+ logMetrics := func(t *testing.T, s []metrics.Sample) {
+ for i := range s {
+ t.Logf("%s: %d", s[i].Name, s[i].Value.Uint64())
+ }
+ }
+
+ // generalSlack is the amount of goroutines we allow ourselves to be
+ // off by in any given category, either due to background system
+ // goroutines or testing package goroutines.
+ const generalSlack = 4
+
+ // waitingSlack is the max number of blocked goroutines left
+ // from other tests, the testing package, or system
+ // goroutines.
+ const waitingSlack = 100
+
+ // Make sure GC isn't running, since GC workers interfere with
+ // expected counts.
+ defer debug.SetGCPercent(debug.SetGCPercent(-1))
+ runtime.GC()
+
+ check := func(t *testing.T, s *metrics.Sample, min, max uint64) {
+ val := s.Value.Uint64()
+ if val < min {
+ t.Errorf("%s too low; %d < %d", s.Name, val, min)
+ }
+ if val > max {
+ t.Errorf("%s too high; %d > %d", s.Name, val, max)
+ }
+ }
+ checkEq := func(t *testing.T, s *metrics.Sample, value uint64) {
+ check(t, s, value, value)
+ }
+ spinUntil := func(f func() bool, timeout time.Duration) bool {
+ start := time.Now()
+ for time.Since(start) < timeout {
+ if f() {
+ return true
+ }
+ time.Sleep(time.Millisecond)
+ }
+ return false
+ }
+
+ // Check base values.
+ t.Run("base", func(t *testing.T) {
+ defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+ metrics.Read(s[:])
+ logMetrics(t, s[:])
+ check(t, &s[notInGo], 0, generalSlack)
+ check(t, &s[runnable], 0, generalSlack)
+ checkEq(t, &s[running], 1)
+ check(t, &s[waiting], 0, waitingSlack)
+ })
+
+ // Force Running count to be high. We'll use these goroutines
+ // for Runnable, too.
+ const count = 10
+ var ready, exit atomic.Uint32
+ for i := 0; i < count-1; i++ {
+ go func() {
+ ready.Add(1)
+ for exit.Load() == 0 {
+ // Spin to get us and keep us running, but check
+ // the exit condition so we exit out early if we're
+ // done.
+ start := time.Now()
+ for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
+ }
+ runtime.Gosched()
+ }
+ }()
+ }
+ for ready.Load() < count-1 {
+ runtime.Gosched()
+ }
+
+ // Be careful. We've entered a dangerous state for platforms
+ // that do not return back to the underlying system unless all
+ // goroutines are blocked, like js/wasm, since we have a bunch
+ // of runnable goroutines all spinning. We cannot write anything
+ // out.
+ if testenv.HasParallelism() {
+ t.Run("running", func(t *testing.T) {
+ defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
+ // It can take a little bit for the scheduler to
+ // distribute the goroutines to Ps, so retry for a
+ // while.
+ spinUntil(func() bool {
+ metrics.Read(s[:])
+ return s[running].Value.Uint64() >= count
+ }, time.Second)
+ logMetrics(t, s[:])
+ check(t, &s[running], count, count+4)
+ })
+
+ // Force runnable count to be high.
+ t.Run("runnable", func(t *testing.T) {
+ defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+ metrics.Read(s[:])
+ logMetrics(t, s[:])
+ checkEq(t, &s[running], 1)
+ check(t, &s[runnable], count-1, count+generalSlack)
+ })
+
+ // Done with the running/runnable goroutines.
+ exit.Store(1)
+ } else {
+ // Read metrics and then exit all the other goroutines,
+ // so that system calls may proceed.
+ metrics.Read(s[:])
+
+ // Done with the running/runnable goroutines.
+ exit.Store(1)
+
+ // Now we can check our invariants.
+ t.Run("running", func(t *testing.T) {
+ logMetrics(t, s[:])
+ checkEq(t, &s[running], 1)
+ })
+ t.Run("runnable", func(t *testing.T) {
+ logMetrics(t, s[:])
+ check(t, &s[runnable], count-1, count+generalSlack)
+ })
+ }
+
+ // Force not-in-go count to be high. This is a little tricky since
+ // we try really hard not to let things block in system calls.
+ // We have to drop to the syscall package to do this reliably.
+ t.Run("not-in-go", func(t *testing.T) {
+ // Block a bunch of goroutines on an OS pipe.
+ pr, pw, err := pipe()
+ if err != nil {
+ switch runtime.GOOS {
+ case "js", "wasip1":
+ t.Skip("creating pipe:", err)
+ }
+ t.Fatal("creating pipe:", err)
+ }
+ for i := 0; i < count; i++ {
+ go syscall.Read(pr, make([]byte, 1))
+ }
+
+ // Let the goroutines block.
+ spinUntil(func() bool {
+ metrics.Read(s[:])
+ return s[notInGo].Value.Uint64() >= count
+ }, time.Second)
+
+ metrics.Read(s[:])
+ logMetrics(t, s[:])
+ check(t, &s[notInGo], count, count+generalSlack)
+
+ syscall.Close(pw)
+ syscall.Close(pr)
+ })
+
+ t.Run("waiting", func(t *testing.T) {
+ // Force waiting count to be high.
+ const waitingCount = 1000
+ stop = make(chan bool)
+ for i := 0; i < waitingCount; i++ {
+ go func() { <-stop }()
+ }
+
+ // Let the goroutines block.
+ spinUntil(func() bool {
+ metrics.Read(s[:])
+ return s[waiting].Value.Uint64() >= waitingCount
+ }, time.Second)
+
+ metrics.Read(s[:])
+ logMetrics(t, s[:])
+ check(t, &s[waiting], waitingCount, waitingCount+waitingSlack)
+
+ close(stop)
+ })
+}