diff options
Diffstat (limited to 'src/cmd')
| -rw-r--r-- | src/cmd/compile/internal/base/base.go | 194 | ||||
| -rw-r--r-- | src/cmd/compile/internal/base/debug.go | 1 | ||||
| -rw-r--r-- | src/cmd/compile/internal/base/startheap.go | 272 | ||||
| -rw-r--r-- | src/cmd/compile/internal/gc/main.go | 11 |
4 files changed, 280 insertions, 198 deletions
diff --git a/src/cmd/compile/internal/base/base.go b/src/cmd/compile/internal/base/base.go index ee3772c5ca..405c7938a5 100644 --- a/src/cmd/compile/internal/base/base.go +++ b/src/cmd/compile/internal/base/base.go @@ -5,11 +5,7 @@ package base import ( - "fmt" "os" - "runtime" - "runtime/debug" - "runtime/metrics" ) var atExitFuncs []func() @@ -29,193 +25,3 @@ func Exit(code int) { // To enable tracing support (-t flag), set EnableTrace to true. const EnableTrace = false - -// forEachGC calls fn each GC cycle until it returns false. -func forEachGC(fn func() bool) { - type T [32]byte // large enough to avoid runtime's tiny object allocator - - var finalizer func(*T) - finalizer = func(p *T) { - if fn() { - runtime.SetFinalizer(p, finalizer) - } - } - - finalizer(new(T)) -} - -// AdjustStartingHeap modifies GOGC so that GC should not occur until the heap -// grows to the requested size. This is intended but not promised, though it -// is true-mostly, depending on when the adjustment occurs and on the -// compiler's input and behavior. Once this size is approximately reached -// GOGC is reset to 100; subsequent GCs may reduce the heap below the requested -// size, but this function does not affect that. -// -// -d=gcadjust=1 enables logging of GOGC adjustment events. -// -// NOTE: If you think this code would help startup time in your own -// application and you decide to use it, please benchmark first to see if it -// actually works for you (it may not: the Go compiler is not typical), and -// whatever the outcome, please leave a comment on bug #56546. This code -// uses supported interfaces, but depends more than we like on -// current+observed behavior of the garbage collector, so if many people need -// this feature, we should consider/propose a better way to accomplish it. -func AdjustStartingHeap(requestedHeapGoal uint64) { - logHeapTweaks := Debug.GCAdjust == 1 - mp := runtime.GOMAXPROCS(0) - gcConcurrency := Flag.LowerC - - const ( - goal = "/gc/heap/goal:bytes" - count = "/gc/cycles/total:gc-cycles" - allocs = "/gc/heap/allocs:bytes" - frees = "/gc/heap/frees:bytes" - ) - - sample := []metrics.Sample{{Name: goal}, {Name: count}, {Name: allocs}, {Name: frees}} - const ( - GOAL = 0 - COUNT = 1 - ALLOCS = 2 - FREES = 3 - ) - - // Assumptions and observations of Go's garbage collector, as of Go 1.17-1.20: - - // - the initial heap goal is 4M, by fiat. It is possible for Go to start - // with a heap as small as 512k, so this may change in the future. - - // - except for the first heap goal, heap goal is a function of - // observed-live at the previous GC and current GOGC. After the first - // GC, adjusting GOGC immediately updates GOGC; before the first GC, - // adjusting GOGC does not modify goal (but the change takes effect after - // the first GC). - - // - the before/after first GC behavior is not guaranteed anywhere, it's - // just behavior, and it's a bad idea to rely on it. - - // - we don't know exactly when GC will run, even after we adjust GOGC; the - // first GC may not have happened yet, may have already happened, or may - // be currently in progress, and GCs can start for several reasons. - - // - forEachGC above will run the provided function at some delay after each - // GC's mark phase terminates; finalizers are run after marking as the - // spans containing finalizable objects are swept, driven by GC - // background activity and allocation demand. - - // - "live at last GC" is not available through the current metrics - // interface. Instead, live is estimated by knowing the adjusted value of - // GOGC and the new heap goal following a GC (this requires knowing that - // at least one GC has occurred): - // estLive = 100 * newGoal / (100 + currentGogc) - // this new value of GOGC - // newGogc = 100*requestedHeapGoal/estLive - 100 - // will result in the desired goal. The logging code checks that the - // resulting goal is correct. - - // There's a small risk that the finalizer will be slow to run after a GC - // that expands the goal to a huge value, and that this will lead to - // out-of-memory. This doesn't seem to happen; in experiments on a variety - // of machines with a variety of extra loads to disrupt scheduling, the - // worst overshoot observed was 50% past requestedHeapGoal. - - metrics.Read(sample) - for _, s := range sample { - if s.Value.Kind() == metrics.KindBad { - // Just return, a slightly slower compilation is a tolerable outcome. - if logHeapTweaks { - fmt.Fprintf(os.Stderr, "GCAdjust: Regret unexpected KindBad for metric %s\n", s.Name) - } - return - } - } - - // Tinker with GOGC to make the heap grow rapidly at first. - currentGoal := sample[GOAL].Value.Uint64() // Believe this will be 4MByte or less, perhaps 512k - myGogc := 100 * requestedHeapGoal / currentGoal - if myGogc <= 150 { - return - } - - if logHeapTweaks { - sample := append([]metrics.Sample(nil), sample...) // avoid races with GC callback - AtExit(func() { - metrics.Read(sample) - goal := sample[GOAL].Value.Uint64() - count := sample[COUNT].Value.Uint64() - oldGogc := debug.SetGCPercent(100) - if oldGogc == 100 { - fmt.Fprintf(os.Stderr, "GCAdjust: AtExit goal %d gogc %d count %d maxprocs %d gcConcurrency %d\n", - goal, oldGogc, count, mp, gcConcurrency) - } else { - inUse := sample[ALLOCS].Value.Uint64() - sample[FREES].Value.Uint64() - overPct := 100 * (int(inUse) - int(requestedHeapGoal)) / int(requestedHeapGoal) - fmt.Fprintf(os.Stderr, "GCAdjust: AtExit goal %d gogc %d count %d maxprocs %d gcConcurrency %d overPct %d\n", - goal, oldGogc, count, mp, gcConcurrency, overPct) - - } - }) - } - - debug.SetGCPercent(int(myGogc)) - - adjustFunc := func() bool { - - metrics.Read(sample) - goal := sample[GOAL].Value.Uint64() - count := sample[COUNT].Value.Uint64() - - if goal <= requestedHeapGoal { // Stay the course - if logHeapTweaks { - fmt.Fprintf(os.Stderr, "GCAdjust: Reuse GOGC adjust, current goal %d, count is %d, current gogc %d\n", - goal, count, myGogc) - } - return true - } - - // Believe goal has been adjusted upwards, else it would be less-than-or-equal than requestedHeapGoal - calcLive := 100 * goal / (100 + myGogc) - - if 2*calcLive < requestedHeapGoal { // calcLive can exceed requestedHeapGoal! - myGogc = 100*requestedHeapGoal/calcLive - 100 - - if myGogc > 125 { - // Not done growing the heap. - oldGogc := debug.SetGCPercent(int(myGogc)) - - if logHeapTweaks { - // Check that the new goal looks right - inUse := sample[ALLOCS].Value.Uint64() - sample[FREES].Value.Uint64() - metrics.Read(sample) - newGoal := sample[GOAL].Value.Uint64() - pctOff := 100 * (int64(newGoal) - int64(requestedHeapGoal)) / int64(requestedHeapGoal) - // Check that the new goal is close to requested. 3% of make.bash fails this test. Why, TBD. - if pctOff < 2 { - fmt.Fprintf(os.Stderr, "GCAdjust: Retry GOGC adjust, current goal %d, count is %d, gogc was %d, is now %d, calcLive %d pctOff %d\n", - goal, count, oldGogc, myGogc, calcLive, pctOff) - } else { - // The GC is being annoying and not giving us the goal that we requested, say more to help understand when/why. - fmt.Fprintf(os.Stderr, "GCAdjust: Retry GOGC adjust, current goal %d, count is %d, gogc was %d, is now %d, calcLive %d pctOff %d inUse %d\n", - goal, count, oldGogc, myGogc, calcLive, pctOff, inUse) - } - } - return true - } - } - - // In this case we're done boosting GOGC, set it to 100 and don't set a new finalizer. - oldGogc := debug.SetGCPercent(100) - // inUse helps estimate how late the finalizer ran; at the instant the previous GC ended, - // it was (in theory) equal to the previous GC's heap goal. In a growing heap it is - // expected to grow to the new heap goal. - inUse := sample[ALLOCS].Value.Uint64() - sample[FREES].Value.Uint64() - overPct := 100 * (int(inUse) - int(requestedHeapGoal)) / int(requestedHeapGoal) - if logHeapTweaks { - fmt.Fprintf(os.Stderr, "GCAdjust: Reset GOGC adjust, old goal %d, count is %d, gogc was %d, calcLive %d inUse %d overPct %d\n", - goal, count, oldGogc, calcLive, inUse, overPct) - } - return false - } - - forEachGC(adjustFunc) -} diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go index b532bf435e..e32a07d461 100644 --- a/src/cmd/compile/internal/base/debug.go +++ b/src/cmd/compile/internal/base/debug.go @@ -38,6 +38,7 @@ type DebugFlags struct { GCAdjust int `help:"log adjustments to GOGC" concurrent:"ok"` GCCheck int `help:"check heap/gc use by compiler" concurrent:"ok"` GCProg int `help:"print dump of GC programs"` + GCStart int `help:"specify \"starting\" compiler's heap size in MiB" concurrent:"ok"` Gossahash string `help:"hash value for use in debugging the compiler"` InlFuncsWithClosures int `help:"allow functions with closures to be inlined" concurrent:"ok"` InlStaticInit int `help:"allow static initialization of inlined calls" concurrent:"ok"` diff --git a/src/cmd/compile/internal/base/startheap.go b/src/cmd/compile/internal/base/startheap.go new file mode 100644 index 0000000000..1d2713efdb --- /dev/null +++ b/src/cmd/compile/internal/base/startheap.go @@ -0,0 +1,272 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package base + +import ( + "fmt" + "os" + "runtime" + "runtime/debug" + "runtime/metrics" + "sync" +) + +// forEachGC calls fn each GC cycle until it returns false. +func forEachGC(fn func() bool) { + type T [32]byte // large enough to avoid runtime's tiny object allocator + var finalizer func(*T) + finalizer = func(p *T) { + + if fn() { + runtime.SetFinalizer(p, finalizer) + } + } + + finalizer(new(T)) +} + +// AdjustStartingHeap modifies GOGC so that GC should not occur until the heap +// grows to the requested size. This is intended but not promised, though it +// is true-mostly, depending on when the adjustment occurs and on the +// compiler's input and behavior. Once the live heap is approximately half +// this size, GOGC is reset to its value when AdjustStartingHeap was called; +// subsequent GCs may reduce the heap below the requested size, but this +// function does not affect that. +// +// logHeapTweaks (-d=gcadjust=1) enables logging of GOGC adjustment events. +// +// The temporarily requested GOGC is derated from what would be the "obvious" +// value necessary to hit the starting heap goal because the obvious +// (goal/live-1)*100 value seems to grow RSS a little more than it "should" +// (compared to GOMEMLIMIT, e.g.) and the assumption is that the GC's control +// algorithms are tuned for GOGC near 100, and not tuned for huge values of +// GOGC. Different derating factors apply for "lo" and "hi" values of GOGC; +// lo is below derateBreak, hi is above derateBreak. The derating factors, +// expressed as integer percentages, are derateLoPct and derateHiPct. +// 60-75 is an okay value for derateLoPct, 30-65 seems like a good value for +// derateHiPct, and 600 seems like a good value for derateBreak. If these +// are zero, defaults are used instead. +// +// NOTE: If you think this code would help startup time in your own +// application and you decide to use it, please benchmark first to see if it +// actually works for you (it may not: the Go compiler is not typical), and +// whatever the outcome, please leave a comment on bug #56546. This code +// uses supported interfaces, but depends more than we like on +// current+observed behavior of the garbage collector, so if many people need +// this feature, we should consider/propose a better way to accomplish it. +func AdjustStartingHeap(requestedHeapGoal, derateBreak, derateLoPct, derateHiPct uint64, logHeapTweaks bool) { + mp := runtime.GOMAXPROCS(0) + + const ( + SHgoal = "/gc/heap/goal:bytes" + SHcount = "/gc/cycles/total:gc-cycles" + SHallocs = "/gc/heap/allocs:bytes" + SHfrees = "/gc/heap/frees:bytes" + ) + + var sample = []metrics.Sample{{Name: SHgoal}, {Name: SHcount}, {Name: SHallocs}, {Name: SHfrees}} + + const ( + SH_GOAL = 0 + SH_COUNT = 1 + SH_ALLOCS = 2 + SH_FREES = 3 + + MB = 1_000_000 + ) + + // These particular magic numbers are designed to make the RSS footprint of -d=-gcstart=2000 + // resemble that of GOMEMLIMIT=2000MiB GOGC=10000 when building large projects + // (e.g. the Go compiler itself, and the microsoft's typescript AST package), + // with the further restriction that these magic numbers did a good job of reducing user-cpu + // for builds at either gcstart=2000 or gcstart=128. + // + // The benchmarking to obtain this was (a version of): + // + // for i in {1..50} ; do + // for what in std cmd/compile cmd/fix cmd/go github.com/microsoft/typescript-go/internal/ast ; do + // whatbase=`basename ${what}` + // for sh in 128 2000 ; do + // for br in 500 600 ; do + // for shlo in 65 70; do + // for shhi in 55 60 ; do + // benchcmd -n=2 ${whatbase} go build -a \ + // -gcflags=all=-d=gcstart=${sh},gcstartloderate=${shlo},gcstarthiderate=${shhi},gcstartbreak=${br} \ + // ${what} | tee -a startheap${sh}_${br}_${shhi}_${shlo}.bench + // done + // done + // done + // done + // done + // done + // + // benchcmd is "go install github.com/aclements/go-misc/benchcmd@latest" + + if derateBreak == 0 { + derateBreak = 600 + } + if derateLoPct == 0 { + derateLoPct = 70 + } + if derateHiPct == 0 { + derateHiPct = 55 + } + + gogcDerate := func(myGogc uint64) uint64 { + if myGogc < derateBreak { + return (myGogc * derateLoPct) / 100 + } + return (myGogc * derateHiPct) / 100 + } + + // Assumptions and observations of Go's garbage collector, as of Go 1.17-1.20: + + // - the initial heap goal is 4MiB, by fiat. It is possible for Go to start + // with a heap as small as 512k, so this may change in the future. + + // - except for the first heap goal, heap goal is a function of + // observed-live at the previous GC and current GOGC. After the first + // GC, adjusting GOGC immediately updates GOGC; before the first GC, + // adjusting GOGC does not modify goal (but the change takes effect after + // the first GC). + + // - the before/after first GC behavior is not guaranteed anywhere, it's + // just behavior, and it's a bad idea to rely on it. + + // - we don't know exactly when GC will run, even after we adjust GOGC; the + // first GC may not have happened yet, may have already happened, or may + // be currently in progress, and GCs can start for several reasons. + + // - forEachGC above will run the provided function at some delay after each + // GC's mark phase terminates; finalizers are run after marking as the + // spans containing finalizable objects are swept, driven by GC + // background activity and allocation demand. + + // - "live at last GC" is not available through the current metrics + // interface. Instead, live is estimated by knowing the adjusted value of + // GOGC and the new heap goal following a GC (this requires knowing that + // at least one GC has occurred): + // estLive = 100 * newGoal / (100 + currentGogc) + // this new value of GOGC + // newGogc = 100*requestedHeapGoal/estLive - 100 + // will result in the desired goal. The logging code checks that the + // resulting goal is correct. + + // There's a small risk that the finalizer will be slow to run after a GC + // that expands the goal to a huge value, and that this will lead to + // out-of-memory. This doesn't seem to happen; in experiments on a variety + // of machines with a variety of extra loads to disrupt scheduling, the + // worst overshoot observed was 50% past requestedHeapGoal. + + metrics.Read(sample) + for _, s := range sample { + if s.Value.Kind() == metrics.KindBad { + // Just return, a slightly slower compilation is a tolerable outcome. + if logHeapTweaks { + fmt.Fprintf(os.Stderr, "GCAdjust: Regret unexpected KindBad for metric %s\n", s.Name) + } + return + } + } + + // Tinker with GOGC to make the heap grow rapidly at first. + currentGoal := sample[SH_GOAL].Value.Uint64() // Believe this will be 4MByte or less, perhaps 512k + myGogc := 100 * requestedHeapGoal / currentGoal + myGogc = gogcDerate(myGogc) + if myGogc <= 125 { + return + } + + if logHeapTweaks { + sample := append([]metrics.Sample(nil), sample...) // avoid races with GC callback + AtExit(func() { + metrics.Read(sample) + goal := sample[SH_GOAL].Value.Uint64() + count := sample[SH_COUNT].Value.Uint64() + oldGogc := debug.SetGCPercent(100) + if oldGogc == 100 { + fmt.Fprintf(os.Stderr, "GCAdjust: AtExit goal %dMB gogc %d count %d maxprocs %d\n", + goal/MB, oldGogc, count, mp) + } else { + inUse := sample[SH_ALLOCS].Value.Uint64() - sample[SH_FREES].Value.Uint64() + overPct := 100 * (int(inUse) - int(requestedHeapGoal)) / int(requestedHeapGoal) + fmt.Fprintf(os.Stderr, "GCAdjust: AtExit goal %dMB gogc %d count %d maxprocs %d overPct %d\n", + goal/MB, oldGogc, count, mp, overPct) + + } + }) + } + + originalGOGC := debug.SetGCPercent(int(myGogc)) + + // forEachGC finalizers ought not overlap, but they could run in separate threads. + // This ought not matter, but just in case it bothers the/a race detector, + // use this mutex. + var forEachGCLock sync.Mutex + + adjustFunc := func() bool { + + forEachGCLock.Lock() + defer forEachGCLock.Unlock() + + metrics.Read(sample) + goal := sample[SH_GOAL].Value.Uint64() + count := sample[SH_COUNT].Value.Uint64() + + if goal <= requestedHeapGoal { // Stay the course + if logHeapTweaks { + fmt.Fprintf(os.Stderr, "GCAdjust: Reuse GOGC adjust, current goal %dMB, count is %d, current gogc %d\n", + goal/MB, count, myGogc) + } + return true + } + + // Believe goal has been adjusted upwards, else it would be less-than-or-equal to requestedHeapGoal + calcLive := 100 * goal / (100 + myGogc) + + if 2*calcLive < requestedHeapGoal { // calcLive can exceed requestedHeapGoal! + myGogc = 100*requestedHeapGoal/calcLive - 100 + myGogc = gogcDerate(myGogc) + + if myGogc > 125 { + // Not done growing the heap. + oldGogc := debug.SetGCPercent(int(myGogc)) + + if logHeapTweaks { + // Check that the new goal looks right + inUse := sample[SH_ALLOCS].Value.Uint64() - sample[SH_FREES].Value.Uint64() + metrics.Read(sample) + newGoal := sample[SH_GOAL].Value.Uint64() + pctOff := 100 * (int64(newGoal) - int64(requestedHeapGoal)) / int64(requestedHeapGoal) + // Check that the new goal is close to requested. 3% of make.bash fails this test. Why, TBD. + if pctOff < 2 { + fmt.Fprintf(os.Stderr, "GCAdjust: Retry GOGC adjust, current goal %dMB, count is %d, gogc was %d, is now %d, calcLive %dMB pctOff %d\n", + goal/MB, count, oldGogc, myGogc, calcLive/MB, pctOff) + } else { + // The GC is being annoying and not giving us the goal that we requested, say more to help understand when/why. + fmt.Fprintf(os.Stderr, "GCAdjust: Retry GOGC adjust, current goal %dMB, count is %d, gogc was %d, is now %d, calcLive %dMB pctOff %d inUse %dMB\n", + goal/MB, count, oldGogc, myGogc, calcLive/MB, pctOff, inUse/MB) + } + } + return true + } + } + + // In this case we're done boosting GOGC, set it to its original value and don't set a new finalizer. + oldGogc := debug.SetGCPercent(originalGOGC) + // inUse helps estimate how late the finalizer ran; at the instant the previous GC ended, + // it was (in theory) equal to the previous GC's heap goal. In a growing heap it is + // expected to grow to the new heap goal. + if logHeapTweaks { + inUse := sample[SH_ALLOCS].Value.Uint64() - sample[SH_FREES].Value.Uint64() + overPct := 100 * (int(inUse) - int(requestedHeapGoal)) / int(requestedHeapGoal) + fmt.Fprintf(os.Stderr, "GCAdjust: Reset GOGC adjust, old goal %dMB, count is %d, gogc was %d, gogc is now %d, calcLive %dMB inUse %dMB overPct %d\n", + goal/MB, count, oldGogc, originalGOGC, calcLive/MB, inUse/MB, overPct) + } + return false + } + + forEachGC(adjustFunc) +} diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 2a3a3f786b..afa16508d1 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -83,10 +83,13 @@ func Main(archInit func(*ssagen.ArchInfo)) { base.DebugSSA = ssa.PhaseOption base.ParseFlags() - if os.Getenv("GOGC") == "" { // GOGC set disables starting heap adjustment - // More processors will use more heap, but assume that more memory is available. - // So 1 processor -> 40MB, 4 -> 64MB, 12 -> 128MB - base.AdjustStartingHeap(uint64(32+8*base.Flag.LowerC) << 20) + if flagGCStart := base.Debug.GCStart; flagGCStart > 0 || // explicit flags overrides environment variable disable of GC boost + os.Getenv("GOGC") == "" && os.Getenv("GOMEMLIMIT") == "" && base.Flag.LowerC != 1 { // explicit GC knobs or no concurrency implies default heap + startHeapMB := int64(128) + if flagGCStart > 0 { + startHeapMB = int64(flagGCStart) + } + base.AdjustStartingHeap(uint64(startHeapMB)<<20, 0, 0, 0, base.Debug.GCAdjust == 1) } types.LocalPkg = types.NewPkg(base.Ctxt.Pkgpath, "") |
