diff options
| author | Michael Anthony Knyszek <mknyszek@google.com> | 2025-07-23 17:35:54 +0000 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2025-08-15 11:31:00 -0700 |
| commit | 13df972f6885ebdeba1ea38f0acd99ea0f2bfb49 (patch) | |
| tree | a21583595b6f0e8a29054840d6b04bba04f5fbe4 /src/runtime/metrics_test.go | |
| parent | bd07fafb0a2f979b2be05d9a533182ca55428079 (diff) | |
| download | go-13df972f6885ebdeba1ea38f0acd99ea0f2bfb49.tar.xz | |
runtime/metrics: add metrics for goroutine sched states
This is largely a port of CL 38180.
For #15490.
Change-Id: I2726111e472e81e9f9f0f294df97872c2689f061
Reviewed-on: https://go-review.googlesource.com/c/go/+/690397
Reviewed-by: Michael Pratt <mpratt@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime/metrics_test.go')
| -rw-r--r-- | src/runtime/metrics_test.go | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index 5787c96084..5b16cbcb22 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -22,6 +22,7 @@ import ( "strings" "sync" "sync/atomic" + "syscall" "testing" "time" "unsafe" @@ -1575,3 +1576,195 @@ func TestReadMetricsFinalizers(t *testing.T) { t.Errorf("expected %s difference to be exactly %d, got %d -> %d", before[1].Name, N, v0, v1) } } + +func TestReadMetricsSched(t *testing.T) { + const ( + notInGo = iota + runnable + running + waiting + ) + var s [4]metrics.Sample + s[0].Name = "/sched/goroutines/not-in-go:goroutines" + s[1].Name = "/sched/goroutines/runnable:goroutines" + s[2].Name = "/sched/goroutines/running:goroutines" + s[3].Name = "/sched/goroutines/waiting:goroutines" + + logMetrics := func(t *testing.T, s []metrics.Sample) { + for i := range s { + t.Logf("%s: %d", s[i].Name, s[i].Value.Uint64()) + } + } + + // generalSlack is the amount of goroutines we allow ourselves to be + // off by in any given category, either due to background system + // goroutines or testing package goroutines. + const generalSlack = 4 + + // waitingSlack is the max number of blocked goroutines left + // from other tests, the testing package, or system + // goroutines. + const waitingSlack = 100 + + // Make sure GC isn't running, since GC workers interfere with + // expected counts. + defer debug.SetGCPercent(debug.SetGCPercent(-1)) + runtime.GC() + + check := func(t *testing.T, s *metrics.Sample, min, max uint64) { + val := s.Value.Uint64() + if val < min { + t.Errorf("%s too low; %d < %d", s.Name, val, min) + } + if val > max { + t.Errorf("%s too high; %d > %d", s.Name, val, max) + } + } + checkEq := func(t *testing.T, s *metrics.Sample, value uint64) { + check(t, s, value, value) + } + spinUntil := func(f func() bool, timeout time.Duration) bool { + start := time.Now() + for time.Since(start) < timeout { + if f() { + return true + } + time.Sleep(time.Millisecond) + } + return false + } + + // Check base values. + t.Run("base", func(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + metrics.Read(s[:]) + logMetrics(t, s[:]) + check(t, &s[notInGo], 0, generalSlack) + check(t, &s[runnable], 0, generalSlack) + checkEq(t, &s[running], 1) + check(t, &s[waiting], 0, waitingSlack) + }) + + // Force Running count to be high. We'll use these goroutines + // for Runnable, too. + const count = 10 + var ready, exit atomic.Uint32 + for i := 0; i < count-1; i++ { + go func() { + ready.Add(1) + for exit.Load() == 0 { + // Spin to get us and keep us running, but check + // the exit condition so we exit out early if we're + // done. + start := time.Now() + for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 { + } + runtime.Gosched() + } + }() + } + for ready.Load() < count-1 { + runtime.Gosched() + } + + // Be careful. We've entered a dangerous state for platforms + // that do not return back to the underlying system unless all + // goroutines are blocked, like js/wasm, since we have a bunch + // of runnable goroutines all spinning. We cannot write anything + // out. + if testenv.HasParallelism() { + t.Run("running", func(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4)) + // It can take a little bit for the scheduler to + // distribute the goroutines to Ps, so retry for a + // while. + spinUntil(func() bool { + metrics.Read(s[:]) + return s[running].Value.Uint64() >= count + }, time.Second) + logMetrics(t, s[:]) + check(t, &s[running], count, count+4) + }) + + // Force runnable count to be high. + t.Run("runnable", func(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + metrics.Read(s[:]) + logMetrics(t, s[:]) + checkEq(t, &s[running], 1) + check(t, &s[runnable], count-1, count+generalSlack) + }) + + // Done with the running/runnable goroutines. + exit.Store(1) + } else { + // Read metrics and then exit all the other goroutines, + // so that system calls may proceed. + metrics.Read(s[:]) + + // Done with the running/runnable goroutines. + exit.Store(1) + + // Now we can check our invariants. + t.Run("running", func(t *testing.T) { + logMetrics(t, s[:]) + checkEq(t, &s[running], 1) + }) + t.Run("runnable", func(t *testing.T) { + logMetrics(t, s[:]) + check(t, &s[runnable], count-1, count+generalSlack) + }) + } + + // Force not-in-go count to be high. This is a little tricky since + // we try really hard not to let things block in system calls. + // We have to drop to the syscall package to do this reliably. + t.Run("not-in-go", func(t *testing.T) { + // Block a bunch of goroutines on an OS pipe. + pr, pw, err := pipe() + if err != nil { + switch runtime.GOOS { + case "js", "wasip1": + t.Skip("creating pipe:", err) + } + t.Fatal("creating pipe:", err) + } + for i := 0; i < count; i++ { + go syscall.Read(pr, make([]byte, 1)) + } + + // Let the goroutines block. + spinUntil(func() bool { + metrics.Read(s[:]) + return s[notInGo].Value.Uint64() >= count + }, time.Second) + + metrics.Read(s[:]) + logMetrics(t, s[:]) + check(t, &s[notInGo], count, count+generalSlack) + + syscall.Close(pw) + syscall.Close(pr) + }) + + t.Run("waiting", func(t *testing.T) { + // Force waiting count to be high. + const waitingCount = 1000 + stop = make(chan bool) + for i := 0; i < waitingCount; i++ { + go func() { <-stop }() + } + + // Let the goroutines block. + spinUntil(func() bool { + metrics.Read(s[:]) + return s[waiting].Value.Uint64() >= waitingCount + }, time.Second) + + metrics.Read(s[:]) + logMetrics(t, s[:]) + check(t, &s[waiting], waitingCount, waitingCount+waitingSlack) + + close(stop) + }) +} |
