aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/cgroup_linux.go
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2025-05-05 13:44:26 -0400
committerMichael Pratt <mpratt@google.com>2025-05-21 10:21:55 -0700
commite6dacf91ffb0a356aa692ab5c46411e2eef913f3 (patch)
treefc337b1d3fe594503468a6af639ed6459442ee2a /src/runtime/cgroup_linux.go
parentf12c66fbed546645389cf184b0e2ffd6ad9f78ec (diff)
downloadgo-e6dacf91ffb0a356aa692ab5c46411e2eef913f3.tar.xz
runtime: use cgroup CPU limit to set GOMAXPROCS
This CL adds two related features enabled by default via compatibility GODEBUGs containermaxprocs and updatemaxprocs. On Linux, containermaxprocs makes the Go runtime consider cgroup CPU bandwidth limits (quota/period) when setting GOMAXPROCS. If the cgroup limit is lower than the number of logical CPUs available, then the cgroup limit takes precedence. On all OSes, updatemaxprocs makes the Go runtime periodically recalculate the default GOMAXPROCS value and update GOMAXPROCS if it has changed. If GOMAXPROCS is set manually, this update does not occur. This is intended primarily to detect changes to cgroup limits, but it applies on all OSes because the CPU affinity mask can change as well. The runtime only considers the limit in the leaf cgroup (the one that actually contains the process), caching the CPU limit file descriptor(s), which are periodically reread for updates. This is a small departure from the original proposed design. It will not consider limits of parent cgroups (which may be lower than the leaf), and it will not detection cgroup migration after process start. We can consider changing this in the future, but the simpler approach is less invasive; less risk to packages that have some awareness of runtime internals. e.g., if the runtime periodically opens new files during execution, file descriptor leak detection is difficult to implement in a stable way. For #73193. Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64-longtest Change-Id: I6a6a636c631c1ae577fb8254960377ba91c5dc98 Reviewed-on: https://go-review.googlesource.com/c/go/+/670497 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
Diffstat (limited to 'src/runtime/cgroup_linux.go')
-rw-r--r--src/runtime/cgroup_linux.go119
1 files changed, 119 insertions, 0 deletions
diff --git a/src/runtime/cgroup_linux.go b/src/runtime/cgroup_linux.go
new file mode 100644
index 0000000000..73e7363eb4
--- /dev/null
+++ b/src/runtime/cgroup_linux.go
@@ -0,0 +1,119 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+ "internal/runtime/cgroup"
+)
+
+// cgroup-aware GOMAXPROCS default
+//
+// At startup (defaultGOMAXPROCSInit), we read /proc/self/cgroup and /proc/self/mountinfo
+// to find our current CPU cgroup and open its limit file(s), which remain open
+// for the entire process lifetime. We periodically read the current limit by
+// rereading the limit file(s) from the beginning.
+//
+// This makes reading updated limits simple, but has a few downsides:
+//
+// 1. We only read the limit from the leaf cgroup that actually contains this
+// process. But a parent cgroup may have a tighter limit. That tighter limit
+// would be our effective limit. That said, container runtimes tend to hide
+// parent cgroups from the container anyway.
+//
+// 2. If the process is migrated to another cgroup while it is running it will
+// not notice, as we only check which cgroup we are in once at startup.
+var (
+ // We can't allocate during early initialization when we need to find
+ // the cgroup. Simply use a fixed global as a scratch parsing buffer.
+ cgroupScratch [cgroup.ScratchSize]byte
+
+ cgroupOK bool
+ cgroupCPU cgroup.CPU
+
+ // defaultGOMAXPROCSInit runs before internal/godebug init, so we can't
+ // directly update the GODEBUG counter. Store the result until after
+ // init runs.
+ containermaxprocsNonDefault bool
+ containermaxprocs = &godebugInc{name: "containermaxprocs"}
+)
+
+// Prepare for defaultGOMAXPROCS.
+//
+// Must run after parsedebugvars.
+func defaultGOMAXPROCSInit() {
+ c, err := cgroup.OpenCPU(cgroupScratch[:])
+ if err != nil {
+ // Likely cgroup.ErrNoCgroup.
+ return
+ }
+
+ if debug.containermaxprocs > 0 {
+ // Normal operation.
+ cgroupCPU = c
+ cgroupOK = true
+ return
+ }
+
+ // cgroup-aware GOMAXPROCS is disabled. We still check the cgroup once
+ // at startup to see if enabling the GODEBUG would result in a
+ // different default GOMAXPROCS. If so, we increment runtime/metrics
+ // /godebug/non-default-behavior/cgroupgomaxprocs:events.
+ procs := getCPUCount()
+ cgroupProcs := adjustCgroupGOMAXPROCS(procs, c)
+ if procs != cgroupProcs {
+ containermaxprocsNonDefault = true
+ }
+
+ // Don't need the cgroup for remaining execution.
+ c.Close()
+}
+
+// defaultGOMAXPROCSUpdateGODEBUG updates the internal/godebug counter for
+// container GOMAXPROCS, once internal/godebug is initialized.
+func defaultGOMAXPROCSUpdateGODEBUG() {
+ if containermaxprocsNonDefault {
+ containermaxprocs.IncNonDefault()
+ }
+}
+
+// Return the default value for GOMAXPROCS when it has not been set explicitly.
+//
+// ncpu is the optional precomputed value of getCPUCount. If passed as 0,
+// defaultGOMAXPROCS will call getCPUCount.
+func defaultGOMAXPROCS(ncpu int32) int32 {
+ // GOMAXPROCS is the minimum of:
+ //
+ // 1. Total number of logical CPUs available from sched_getaffinity.
+ //
+ // 2. The average CPU cgroup throughput limit (average throughput =
+ // quota/period). A limit less than 2 is rounded up to 2, and any
+ // fractional component is rounded up.
+ //
+ // TODO: add rationale.
+
+ procs := ncpu
+ if procs <= 0 {
+ procs = getCPUCount()
+ }
+ if !cgroupOK {
+ // No cgroup, or disabled by debug.containermaxprocs.
+ return procs
+ }
+
+ return adjustCgroupGOMAXPROCS(procs, cgroupCPU)
+}
+
+// Lower procs as necessary for the current cgroup CPU limit.
+func adjustCgroupGOMAXPROCS(procs int32, cpu cgroup.CPU) int32 {
+ limit, ok, err := cgroup.ReadCPULimit(cpu)
+ if err == nil && ok {
+ limit = ceil(limit)
+ limit = max(limit, 2)
+ if int32(limit) < procs {
+ procs = int32(limit)
+ }
+ }
+ return procs
+}