runtime: move TestReadMetricsSched to testprog

There are just too many flakes resulting from background pollution by the testing package and other tests. Run in a subprocess where at least the environment can be more tightly controlled. Fixes #75049. Change-Id: Iad59edaaf31268f1fcb77273f01317d963708fa6 Reviewed-on: https://go-review.googlesource.com/c/go/+/707155 Reviewed-by: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
author: Michael Anthony Knyszek <mknyszek@google.com> 2025-09-26 17:05:43 +0000
committer: Gopher Robot <gobot@golang.org> 2025-09-26 10:55:30 -0700
commit: 16ae11a9e1ca064718303f11d30110cf4e4dbbbc (patch)
tree: 31ed4799ae15bdb3c56c7df677a983b0f65b8303 /src/runtime/testdata
parent: 459f3a3adc131d9bb829efe0add27327a29b9c88 (diff)
download: go-16ae11a9e1ca064718303f11d30110cf4e4dbbbc.tar.xz
3 files changed, 295 insertions, 0 deletions
diff --git a/src/runtime/testdata/testprog/pipe_unix.go b/src/runtime/testdata/testprog/pipe_unix.go
new file mode 100644
index 0000000000..cee4da65f6
--- /dev/null
+++ b/src/runtime/testdata/testprog/pipe_unix.go
@@ -0,0 +1,15 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !windows
+
+package main
+
+import "syscall"
+
+func pipe() (r, w int, err error) {
+	var p [2]int
+	err = syscall.Pipe(p[:])
+	return p[0], p[1], err
+}
diff --git a/src/runtime/testdata/testprog/pipe_windows.go b/src/runtime/testdata/testprog/pipe_windows.go
new file mode 100644
index 0000000000..597601a179
--- /dev/null
+++ b/src/runtime/testdata/testprog/pipe_windows.go
@@ -0,0 +1,13 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import "syscall"
+
+func pipe() (r, w syscall.Handle, err error) {
+	var p [2]syscall.Handle
+	err = syscall.Pipe(p[:])
+	return p[0], p[1], err
+}
diff --git a/src/runtime/testdata/testprog/schedmetrics.go b/src/runtime/testdata/testprog/schedmetrics.go
new file mode 100644
index 0000000000..6d3f68a848
--- /dev/null
+++ b/src/runtime/testdata/testprog/schedmetrics.go
@@ -0,0 +1,267 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"internal/testenv"
+	"log"
+	"os"
+	"runtime"
+	"runtime/debug"
+	"runtime/metrics"
+	"strings"
+	"sync/atomic"
+	"syscall"
+	"time"
+)
+
+func init() {
+	register("SchedMetrics", SchedMetrics)
+}
+
+// Tests runtime/metrics.Read for various scheduler metrics.
+//
+// Implemented in testprog to prevent other tests from polluting
+// the metrics.
+func SchedMetrics() {
+	const (
+		notInGo = iota
+		runnable
+		running
+		waiting
+		created
+		threads
+		numSamples
+	)
+	var s [numSamples]metrics.Sample
+	s[notInGo].Name = "/sched/goroutines/not-in-go:goroutines"
+	s[runnable].Name = "/sched/goroutines/runnable:goroutines"
+	s[running].Name = "/sched/goroutines/running:goroutines"
+	s[waiting].Name = "/sched/goroutines/waiting:goroutines"
+	s[created].Name = "/sched/goroutines-created:goroutines"
+	s[threads].Name = "/sched/threads/total:threads"
+
+	var failed bool
+	var out bytes.Buffer
+	logger := log.New(&out, "", 0)
+	indent := 0
+	logf := func(s string, a ...any) {
+		var prefix strings.Builder
+		for range indent {
+			prefix.WriteString("\t")
+		}
+		logger.Printf(prefix.String()+s, a...)
+	}
+	errorf := func(s string, a ...any) {
+		logf(s, a...)
+		failed = true
+	}
+	run := func(name string, f func()) {
+		logf("=== Checking %q", name)
+		indent++
+		f()
+		indent--
+	}
+	logMetrics := func(s []metrics.Sample) {
+		for i := range s {
+			logf("%s: %d", s[i].Name, s[i].Value.Uint64())
+		}
+	}
+
+	// generalSlack is the amount of goroutines we allow ourselves to be
+	// off by in any given category, either due to background system
+	// goroutines. This excludes GC goroutines.
+	generalSlack := uint64(4)
+
+	// waitingSlack is the max number of blocked goroutines controlled
+	// by the runtime that we'll allow for. This includes GC goroutines
+	// as well as finalizer and cleanup goroutines.
+	waitingSlack := generalSlack + uint64(2*runtime.GOMAXPROCS(-1))
+
+	// threadsSlack is the maximum number of threads left over
+	// from the runtime (sysmon, the template thread, etc.)
+	const threadsSlack = 4
+
+	// Make sure GC isn't running, since GC workers interfere with
+	// expected counts.
+	defer debug.SetGCPercent(debug.SetGCPercent(-1))
+	runtime.GC()
+
+	check := func(s *metrics.Sample, min, max uint64) {
+		val := s.Value.Uint64()
+		if val < min {
+			errorf("%s too low; %d < %d", s.Name, val, min)
+		}
+		if val > max {
+			errorf("%s too high; %d > %d", s.Name, val, max)
+		}
+	}
+	checkEq := func(s *metrics.Sample, value uint64) {
+		check(s, value, value)
+	}
+	spinUntil := func(f func() bool) bool {
+		for {
+			if f() {
+				return true
+			}
+			time.Sleep(50 * time.Millisecond)
+		}
+	}
+
+	// Check base values.
+	run("base", func() {
+		defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+		metrics.Read(s[:])
+		logMetrics(s[:])
+		check(&s[notInGo], 0, generalSlack)
+		check(&s[runnable], 0, generalSlack)
+		checkEq(&s[running], 1)
+		check(&s[waiting], 0, waitingSlack)
+	})
+
+	metrics.Read(s[:])
+	createdAfterBase := s[created].Value.Uint64()
+
+	// Force Running count to be high. We'll use these goroutines
+	// for Runnable, too.
+	const count = 10
+	var ready, exit atomic.Uint32
+	for range count {
+		go func() {
+			ready.Add(1)
+			for exit.Load() == 0 {
+				// Spin to get us and keep us running, but check
+				// the exit condition so we exit out early if we're
+				// done.
+				start := time.Now()
+				for time.Since(start) < 10*time.Millisecond && exit.Load() == 0 {
+				}
+				runtime.Gosched()
+			}
+		}()
+	}
+	for ready.Load() < count {
+		runtime.Gosched()
+	}
+
+	// Be careful. We've entered a dangerous state for platforms
+	// that do not return back to the underlying system unless all
+	// goroutines are blocked, like js/wasm, since we have a bunch
+	// of runnable goroutines all spinning. We cannot write anything
+	// out.
+	if testenv.HasParallelism() {
+		run("created", func() {
+			metrics.Read(s[:])
+			logMetrics(s[:])
+			checkEq(&s[created], createdAfterBase+count)
+		})
+		run("running", func() {
+			defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(count + 4))
+			// It can take a little bit for the scheduler to
+			// distribute the goroutines to Ps, so retry until
+			// we see the count we expect or the test times out.
+			spinUntil(func() bool {
+				metrics.Read(s[:])
+				return s[running].Value.Uint64() >= count
+			})
+			logMetrics(s[:])
+			check(&s[running], count, count+4)
+			check(&s[threads], count, count+4+threadsSlack)
+		})
+
+		// Force runnable count to be high.
+		run("runnable", func() {
+			defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
+			metrics.Read(s[:])
+			logMetrics(s[:])
+			checkEq(&s[running], 1)
+			check(&s[runnable], count-1, count+generalSlack)
+		})
+
+		// Done with the running/runnable goroutines.
+		exit.Store(1)
+	} else {
+		// Read metrics and then exit all the other goroutines,
+		// so that system calls may proceed.
+		metrics.Read(s[:])
+
+		// Done with the running/runnable goroutines.
+		exit.Store(1)
+
+		// Now we can check our invariants.
+		run("created", func() {
+			// Look for count-1 goroutines because we read metrics
+			// *before* run goroutine was created for this sub-test.
+			checkEq(&s[created], createdAfterBase+count-1)
+		})
+		run("running", func() {
+			logMetrics(s[:])
+			checkEq(&s[running], 1)
+			checkEq(&s[threads], 1)
+		})
+		run("runnable", func() {
+			logMetrics(s[:])
+			check(&s[runnable], count-1, count+generalSlack)
+		})
+	}
+
+	// Force not-in-go count to be high. This is a little tricky since
+	// we try really hard not to let things block in system calls.
+	// We have to drop to the syscall package to do this reliably.
+	run("not-in-go", func() {
+		// Block a bunch of goroutines on an OS pipe.
+		pr, pw, err := pipe()
+		if err != nil {
+			switch runtime.GOOS {
+			case "js", "wasip1":
+				logf("creating pipe: %v", err)
+				return
+			}
+			panic(fmt.Sprintf("creating pipe: %v", err))
+		}
+		for i := 0; i < count; i++ {
+			go syscall.Read(pr, make([]byte, 1))
+		}
+
+		// Let the goroutines block.
+		spinUntil(func() bool {
+			metrics.Read(s[:])
+			return s[notInGo].Value.Uint64() >= count
+		})
+		logMetrics(s[:])
+		check(&s[notInGo], count, count+generalSlack)
+
+		syscall.Close(pw)
+		syscall.Close(pr)
+	})
+
+	run("waiting", func() {
+		// Force waiting count to be high.
+		const waitingCount = 1000
+		stop := make(chan bool)
+		for i := 0; i < waitingCount; i++ {
+			go func() { <-stop }()
+		}
+
+		// Let the goroutines block.
+		spinUntil(func() bool {
+			metrics.Read(s[:])
+			return s[waiting].Value.Uint64() >= waitingCount
+		})
+		logMetrics(s[:])
+		check(&s[waiting], waitingCount, waitingCount+waitingSlack)
+
+		close(stop)
+	})
+
+	if failed {
+		fmt.Fprintln(os.Stderr, out.String())
+		os.Exit(1)
+	} else {
+		fmt.Fprintln(os.Stderr, "OK")
+	}
+}
author	Michael Anthony Knyszek <mknyszek@google.com>	2025-09-26 17:05:43 +0000
committer	Gopher Robot <gobot@golang.org>	2025-09-26 10:55:30 -0700
commit	16ae11a9e1ca064718303f11d30110cf4e4dbbbc (patch)
tree	31ed4799ae15bdb3c56c7df677a983b0f65b8303 /src/runtime/testdata
parent	459f3a3adc131d9bb829efe0add27327a29b9c88 (diff)
download	go-16ae11a9e1ca064718303f11d30110cf4e4dbbbc.tar.xz