time: garbage collect unstopped Tickers and Timers

From the beginning of Go, the time package has had a gotcha: if you use a select on <-time.After(1*time.Minute), even if the select finishes immediately because some other case is ready, the underlying timer from time.After keeps running until the minute is over. This pins the timer in the timer heap, which keeps it from being garbage collected and in extreme cases also slows down timer operations. The lack of garbage collection is the more important problem. The docs for After warn against this scenario and suggest using NewTimer with a call to Stop after the select instead, purely to work around this garbage collection problem. Oddly, the docs for NewTimer and NewTicker do not mention this problem, but they have the same issue: they cannot be collected until either they are Stopped or, in the case of Timer, the timer expires. (Tickers repeat, so they never expire.) People have built up a shared knowledge that timers and tickers need to defer t.Stop even though the docs do not mention this (it is somewhat implied by the After docs). This CL fixes the garbage collection problem, so that a timer that is unreferenced can be GC'ed immediately, even if it is still running. The approach is to only insert the timer into the heap when some channel operation is blocked on it; the last channel operation to stop using the timer takes it back out of the heap. When a timer's channel is no longer referenced, there are no channel operations blocked on it, so it's not in the heap, so it can be GC'ed immediately. This CL adds an undocumented GODEBUG asynctimerchan=1 that will disable the change. The documentation happens in the CL 568341. Fixes #8898. Fixes #61542. Change-Id: Ieb303b6de1fb3527d3256135151a9e983f3c27e6 Reviewed-on: https://go-review.googlesource.com/c/go/+/512355 Reviewed-by: Austin Clements <austin@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Russ Cox <rsc@golang.org>
author: Russ Cox <rsc@golang.org> 2024-02-14 20:36:47 -0500
committer: Russ Cox <rsc@golang.org> 2024-03-13 21:36:04 +0000
commit: 508bb17edd04479622fad263cd702deac1c49157 (patch)
tree: e30588bcd75e92b7d3ecb59645e37a9356afa25f /src/time
parent: 74a0e3160d969fac27a65cd79a76214f6d1abbf5 (diff)
download: go-508bb17edd04479622fad263cd702deac1c49157.tar.xz
5 files changed, 455 insertions, 19 deletions
diff --git a/src/time/internal_test.go b/src/time/internal_test.go
index 42ebd4d42c..619f605ae7 100644
--- a/src/time/internal_test.go
+++ b/src/time/internal_test.go
@@ -36,7 +36,7 @@ func disablePlatformSources() (undo func()) {
 var Interrupt = interrupt
 var DaysIn = daysIn
 
-func empty(arg any, seq uintptr) {}
+func empty(arg any, seq uintptr, delta int64) {}
 
 // Test that a runtimeTimer with a period that would overflow when on
 // expiration does not throw or cause other timers to hang.
@@ -47,7 +47,7 @@ func CheckRuntimeTimerPeriodOverflow() {
 	// We manually create a runtimeTimer with huge period, but that expires
 	// immediately. The public Timer interface would require waiting for
 	// the entire period before the first update.
-	t := (*Timer)(newTimer(runtimeNano(), 1<<63-1, empty, nil))
+	t := (*Timer)(newTimer(runtimeNano(), 1<<63-1, empty, nil, nil))
 	defer t.Stop()
 
 	// If this test fails, we will either throw (when siftdownTimer detects
diff --git a/src/time/sleep.go b/src/time/sleep.go
index 0176c3003e..e6225dfb35 100644
--- a/src/time/sleep.go
+++ b/src/time/sleep.go
@@ -4,12 +4,32 @@
 
 package time
 
-import _ "unsafe" // for go:linkname
+import (
+	"internal/godebug"
+	"unsafe"
+)
 
 // Sleep pauses the current goroutine for at least the duration d.
 // A negative or zero duration causes Sleep to return immediately.
 func Sleep(d Duration)
 
+var asynctimerchan = godebug.New("asynctimerchan")
+
+// syncTimer returns c as an unsafe.Pointer, for passing to newTimer.
+// If the GODEBUG asynctimerchan has disabled the async timer chan
+// code, then syncTimer always returns nil, to disable the special
+// channel code paths in the runtime.
+func syncTimer(c chan Time) unsafe.Pointer {
+	// If asynctimerchan=1, we don't even tell the runtime
+	// about channel timers, so that we get the pre-Go 1.23 code paths.
+	if asynctimerchan.Value() == "1" {
+		return nil
+	}
+
+	// Otherwise pass to runtime.
+	return *(*unsafe.Pointer)(unsafe.Pointer(&c))
+}
+
 // when is a helper function for setting the 'when' field of a runtimeTimer.
 // It returns what the time will be, in nanoseconds, Duration d in the future.
 // If d is negative, it is ignored. If the returned value would be less than
@@ -29,8 +49,12 @@ func when(d Duration) int64 {
 
 // These functions are pushed to package time from package runtime.
 
+// The arg cp is a chan Time, but the declaration in runtime uses a pointer,
+// so we use a pointer here too. This keeps some tools that aggressively
+// compare linknamed symbol definitions happier.
+//
 //go:linkname newTimer
-func newTimer(when, period int64, f func(any, uintptr), arg any) *Timer
+func newTimer(when, period int64, f func(any, uintptr, int64), arg any, cp unsafe.Pointer) *Timer
 
 //go:linkname stopTimer
 func stopTimer(*Timer) bool
@@ -83,9 +107,18 @@ func (t *Timer) Stop() bool {
 
 // NewTimer creates a new Timer that will send
 // the current time on its channel after at least duration d.
+//
+// Before Go 1.23, the garbage collector did not recover
+// timers that had not yet expired or been stopped, so code often
+// immediately deferred t.Stop after calling NewTimer, to make
+// the timer recoverable when it was no longer needed.
+// As of Go 1.23, the garbage collector can recover unreferenced
+// timers, even if they haven't expired or been stopped.
+// The Stop method is no longer necessary to help the garbage collector.
+// (Code may of course still want to call Stop to stop the timer for other reasons.)
 func NewTimer(d Duration) *Timer {
 	c := make(chan Time, 1)
-	t := (*Timer)(newTimer(when(d), 0, sendTime, c))
+	t := (*Timer)(newTimer(when(d), 0, sendTime, c, syncTimer(c)))
 	t.C = c
 	return t
 }
@@ -133,9 +166,14 @@ func (t *Timer) Reset(d Duration) bool {
 }
 
 // sendTime does a non-blocking send of the current time on c.
-func sendTime(c any, seq uintptr) {
+func sendTime(c any, seq uintptr, delta int64) {
+	// delta is how long ago the channel send was supposed to happen.
+	// The current time can be arbitrarily far into the future, because the runtime
+	// can delay a sendTime call until a goroutines tries to receive from
+	// the channel. Subtract delta to go back to the old time that we
+	// used to send.
 	select {
-	case c.(chan Time) <- Now():
+	case c.(chan Time) <- Now().Add(Duration(-delta)):
 	default:
 	}
 }
@@ -143,9 +181,13 @@ func sendTime(c any, seq uintptr) {
 // After waits for the duration to elapse and then sends the current time
 // on the returned channel.
 // It is equivalent to NewTimer(d).C.
-// The underlying Timer is not recovered by the garbage collector
-// until the timer fires. If efficiency is a concern, use NewTimer
-// instead and call Timer.Stop if the timer is no longer needed.
+//
+// Before Go 1.23, this documentation warned that the underlying
+// Timer would not be recovered by the garbage collector until the
+// timer fired, and that if efficiency was a concern, code should use
+// NewTimer instead and call Timer.Stop if the timer is no longer needed.
+// As of Go 1.23, the garbage collector can recover unreferenced,
+// unstopped timers. There is no reason to prefer NewTimer when After will do.
 func After(d Duration) <-chan Time {
 	return NewTimer(d).C
 }
@@ -155,9 +197,9 @@ func After(d Duration) <-chan Time {
 // be used to cancel the call using its Stop method.
 // The returned Timer's C field is not used and will be nil.
 func AfterFunc(d Duration, f func()) *Timer {
-	return (*Timer)(newTimer(when(d), 0, goFunc, f))
+	return (*Timer)(newTimer(when(d), 0, goFunc, f, nil))
 }
 
-func goFunc(arg any, seq uintptr) {
+func goFunc(arg any, seq uintptr, delta int64) {
 	go arg.(func())()
 }
diff --git a/src/time/sleep_test.go b/src/time/sleep_test.go
index b50f9cd5a5..565af16d4d 100644
--- a/src/time/sleep_test.go
+++ b/src/time/sleep_test.go
@@ -90,7 +90,7 @@ func TestAfterFunc(t *testing.T) {
 	<-c
 }
 
-func TestAfterStress(t *testing.T) {
+func TestTickerStress(t *testing.T) {
 	var stop atomic.Bool
 	go func() {
 		for !stop.Load() {
@@ -109,6 +109,33 @@ func TestAfterStress(t *testing.T) {
 	stop.Store(true)
 }
 
+func TestTickerConcurrentStress(t *testing.T) {
+	var stop atomic.Bool
+	go func() {
+		for !stop.Load() {
+			runtime.GC()
+			// Yield so that the OS can wake up the timer thread,
+			// so that it can generate channel sends for the main goroutine,
+			// which will eventually set stop = 1 for us.
+			Sleep(Nanosecond)
+		}
+	}()
+	ticker := NewTicker(1)
+	var wg sync.WaitGroup
+	for i := 0; i < 10; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for i := 0; i < 100; i++ {
+				<-ticker.C
+			}
+		}()
+	}
+	wg.Wait()
+	ticker.Stop()
+	stop.Store(true)
+}
+
 func TestAfterFuncStarvation(t *testing.T) {
 	// Start two goroutines ping-ponging on a channel send.
 	// At any given time, at least one of these goroutines is runnable:
@@ -304,6 +331,7 @@ func TestAfter(t *testing.T) {
 }
 
 func TestAfterTick(t *testing.T) {
+	t.Parallel()
 	const Count = 10
 	Delta := 100 * Millisecond
 	if testing.Short() {
@@ -461,6 +489,7 @@ func TestTimerStopStress(t *testing.T) {
 	if testing.Short() {
 		return
 	}
+	t.Parallel()
 	for i := 0; i < 100; i++ {
 		go func(i int) {
 			timer := AfterFunc(2*Second, func() {
diff --git a/src/time/tick.go b/src/time/tick.go
index e0bcd16a0d..935b61a8ee 100644
--- a/src/time/tick.go
+++ b/src/time/tick.go
@@ -23,7 +23,16 @@ type Ticker struct {
 // ticks is specified by the duration argument. The ticker will adjust
 // the time interval or drop ticks to make up for slow receivers.
 // The duration d must be greater than zero; if not, NewTicker will
-// panic. Stop the ticker to release associated resources.
+// panic.
+//
+// Before Go 1.23, the garbage collector did not recover
+// tickers that had not yet expired or been stopped, so code often
+// immediately deferred t.Stop after calling NewTicker, to make
+// the ticker recoverable when it was no longer needed.
+// As of Go 1.23, the garbage collector can recover unreferenced
+// tickers, even if they haven't been stopped.
+// The Stop method is no longer necessary to help the garbage collector.
+// (Code may of course still want to call Stop to stop the ticker for other reasons.)
 func NewTicker(d Duration) *Ticker {
 	if d <= 0 {
 		panic("non-positive interval for NewTicker")
@@ -32,7 +41,7 @@ func NewTicker(d Duration) *Ticker {
 	// If the client falls behind while reading, we drop ticks
 	// on the floor until the client catches up.
 	c := make(chan Time, 1)
-	t := (*Ticker)(unsafe.Pointer(newTimer(when(d), int64(d), sendTime, c)))
+	t := (*Ticker)(unsafe.Pointer(newTimer(when(d), int64(d), sendTime, c, syncTimer(c))))
 	t.C = c
 	return t
 }
@@ -64,10 +73,16 @@ func (t *Ticker) Reset(d Duration) {
 }
 
 // Tick is a convenience wrapper for NewTicker providing access to the ticking
-// channel only. While Tick is useful for clients that have no need to shut down
-// the Ticker, be aware that without a way to shut it down the underlying
-// Ticker cannot be recovered by the garbage collector; it "leaks".
-// Unlike NewTicker, Tick will return nil if d <= 0.
+// channel only. Unlike NewTicker, Tick will return nil if d <= 0.
+//
+// Before Go 1.23, this documentation warned that the underlying
+// Ticker would never be recovered by the garbage collector, and that
+// if efficiency was a concern, code should use NewTicker instead and
+// call Ticker.Stop when the ticker is no longer needed.
+// As of Go 1.23, the garbage collector can recover unreferenced
+// tickers, even if they haven't been stopped.
+// The Stop method is no longer necessary to help the garbage collector.
+// There is no longer any reason to prefer NewTicker when Tick will do.
 func Tick(d Duration) <-chan Time {
 	if d <= 0 {
 		return nil
diff --git a/src/time/tick_test.go b/src/time/tick_test.go
index 0ba0c36172..46268acfe3 100644
--- a/src/time/tick_test.go
+++ b/src/time/tick_test.go
@@ -13,6 +13,8 @@ import (
 )
 
 func TestTicker(t *testing.T) {
+	t.Parallel()
+
 	// We want to test that a ticker takes as much time as expected.
 	// Since we don't want the test to run for too long, we don't
 	// want to use lengthy times. This makes the test inherently flaky.
@@ -106,6 +108,8 @@ func TestTickerStopWithDirectInitialization(t *testing.T) {
 
 // Test that a bug tearing down a ticker has been fixed. This routine should not deadlock.
 func TestTeardown(t *testing.T) {
+	t.Parallel()
+
 	Delta := 100 * Millisecond
 	if testing.Short() {
 		Delta = 20 * Millisecond
@@ -256,3 +260,349 @@ func BenchmarkTickerResetNaive(b *testing.B) {
 		ticker.Stop()
 	})
 }
+
+func TestTimerGC(t *testing.T) {
+	run := func(t *testing.T, what string, f func()) {
+		t.Helper()
+		t.Run(what, func(t *testing.T) {
+			t.Helper()
+			const N = 1e4
+			var stats runtime.MemStats
+			runtime.GC()
+			runtime.GC()
+			runtime.GC()
+			runtime.ReadMemStats(&stats)
+			before := int64(stats.Mallocs - stats.Frees)
+
+			for j := 0; j < N; j++ {
+				f()
+			}
+
+			runtime.GC()
+			runtime.GC()
+			runtime.GC()
+			runtime.ReadMemStats(&stats)
+			after := int64(stats.Mallocs - stats.Frees)
+
+			// Allow some slack, but inuse >= N means at least 1 allocation per iteration.
+			inuse := after - before
+			if inuse >= N {
+				t.Errorf("%s did not get GC'ed: %d allocations", what, inuse)
+
+				Sleep(1 * Second)
+				runtime.ReadMemStats(&stats)
+				after := int64(stats.Mallocs - stats.Frees)
+				inuse = after - before
+				t.Errorf("after a sleep: %d allocations", inuse)
+			}
+		})
+	}
+
+	run(t, "After", func() { After(Hour) })
+	run(t, "Tick", func() { Tick(Hour) })
+	run(t, "NewTimer", func() { NewTimer(Hour) })
+	run(t, "NewTicker", func() { NewTicker(Hour) })
+	run(t, "NewTimerStop", func() { NewTimer(Hour).Stop() })
+	run(t, "NewTickerStop", func() { NewTicker(Hour).Stop() })
+}
+
+func TestTimerChan(t *testing.T) {
+	t.Parallel()
+	tick := &timer2{NewTimer(10000 * Second)}
+	testTimerChan(t, tick, tick.C)
+}
+
+func TestTickerChan(t *testing.T) {
+	t.Parallel()
+	tick := NewTicker(10000 * Second)
+	testTimerChan(t, tick, tick.C)
+}
+
+// timer2 is a Timer with Reset and Stop methods with no result,
+// to have the same signatures as Ticker.
+type timer2 struct {
+	*Timer
+}
+
+func (t *timer2) Stop() {
+	t.Timer.Stop()
+}
+
+func (t *timer2) Reset(d Duration) {
+	t.Timer.Reset(d)
+}
+
+type ticker interface {
+	Stop()
+	Reset(Duration)
+}
+
+func testTimerChan(t *testing.T, tick ticker, C <-chan Time) {
+	// Retry parameters. Enough to deflake even on slow machines.
+	// Windows in particular has very coarse timers so we have to
+	// wait 10ms just to make a timer go off.
+	const (
+		sched = 10 * Millisecond
+		tries = 10
+	)
+
+	drain := func() {
+		select {
+		case <-C:
+		default:
+		}
+	}
+	noTick := func() {
+		t.Helper()
+		select {
+		default:
+		case <-C:
+			t.Fatalf("extra tick")
+		}
+	}
+	assertTick := func() {
+		t.Helper()
+		select {
+		default:
+		case <-C:
+			return
+		}
+		for i := 0; i < tries; i++ {
+			Sleep(sched)
+			select {
+			default:
+			case <-C:
+				return
+			}
+		}
+		t.Fatalf("missing tick")
+	}
+	assertLen := func() {
+		t.Helper()
+		var n int
+		if n = len(C); n == 1 {
+			return
+		}
+		for i := 0; i < tries; i++ {
+			Sleep(sched)
+			if n = len(C); n == 1 {
+				return
+			}
+		}
+		t.Fatalf("len(C)  = %d, want 1", n)
+	}
+
+	// Test simple stop; timer never in heap.
+	tick.Stop()
+	noTick()
+
+	// Test modify of timer not in heap.
+	tick.Reset(10000 * Second)
+	noTick()
+
+	// Test modify of timer in heap.
+	tick.Reset(1)
+	assertTick()
+
+	// Sleep long enough that a second tick must happen if this is a ticker.
+	// Test that Reset does not lose the tick that should have happened.
+	Sleep(sched)
+	tick.Reset(10000 * Second)
+	_, isTicker := tick.(*Ticker)
+	if isTicker {
+		assertTick()
+	}
+	noTick()
+
+	// Test that len sees an immediate tick arrive
+	// for Reset of timer in heap.
+	tick.Reset(1)
+	assertLen()
+	assertTick()
+
+	// Test that len sees an immediate tick arrive
+	// for Reset of timer NOT in heap.
+	tick.Stop()
+	drain()
+	tick.Reset(1)
+	assertLen()
+	assertTick()
+
+	// Sleep long enough that a second tick must happen if this is a ticker.
+	// Test that Reset does not lose the tick that should have happened.
+	Sleep(sched)
+	tick.Reset(10000 * Second)
+	if isTicker {
+		assertLen()
+		assertTick()
+	}
+	noTick()
+
+	notDone := func(done chan bool) {
+		t.Helper()
+		select {
+		default:
+		case <-done:
+			t.Fatalf("early done")
+		}
+	}
+
+	waitDone := func(done chan bool) {
+		t.Helper()
+		for i := 0; i < tries; i++ {
+			Sleep(sched)
+			select {
+			case <-done:
+				return
+			default:
+			}
+		}
+		t.Fatalf("never got done")
+	}
+
+	// Reset timer in heap (already reset above, but just in case).
+	tick.Reset(10000 * Second)
+	drain()
+
+	// Test stop while timer in heap (because goroutine is blocked on <-C).
+	done := make(chan bool)
+	notDone(done)
+	go func() {
+		<-C
+		close(done)
+	}()
+	Sleep(sched)
+	notDone(done)
+
+	// Test reset far away while timer in heap.
+	tick.Reset(20000 * Second)
+	Sleep(sched)
+	notDone(done)
+
+	// Test imminent reset while in heap.
+	tick.Reset(1)
+	waitDone(done)
+
+	// If this is a ticker, another tick should have come in already
+	// (they are 1ns apart). If a timer, it should have stopped.
+	if isTicker {
+		assertTick()
+	} else {
+		noTick()
+	}
+
+	tick.Stop()
+	if isTicker {
+		drain()
+	}
+	noTick()
+
+	// Again using select and with two goroutines waiting.
+	tick.Reset(10000 * Second)
+	done = make(chan bool, 2)
+	done1 := make(chan bool)
+	done2 := make(chan bool)
+	stop := make(chan bool)
+	go func() {
+		select {
+		case <-C:
+			done <- true
+		case <-stop:
+		}
+		close(done1)
+	}()
+	go func() {
+		select {
+		case <-C:
+			done <- true
+		case <-stop:
+		}
+		close(done2)
+	}()
+	Sleep(sched)
+	notDone(done)
+	tick.Reset(sched / 2)
+	Sleep(sched)
+	waitDone(done)
+	tick.Stop()
+	close(stop)
+	waitDone(done1)
+	waitDone(done2)
+	if isTicker {
+		// extra send might have sent done again
+		// (handled by buffering done above).
+		select {
+		default:
+		case <-done:
+		}
+		// extra send after that might have filled C.
+		select {
+		default:
+		case <-C:
+		}
+	}
+	notDone(done)
+
+	// Test enqueueTimerChan when timer is stopped.
+	stop = make(chan bool)
+	done = make(chan bool, 2)
+	for i := 0; i < 2; i++ {
+		go func() {
+			select {
+			case <-C:
+				panic("unexpected data")
+			case <-stop:
+			}
+			done <- true
+		}()
+	}
+	Sleep(sched)
+	close(stop)
+	waitDone(done)
+	waitDone(done)
+}
+
+func TestManualTicker(t *testing.T) {
+	// Code should not do this, but some old code dating to Go 1.9 does.
+	// Make sure this doesn't crash.
+	// See go.dev/issue/21874.
+	c := make(chan Time)
+	tick := &Ticker{C: c}
+	tick.Stop()
+}
+
+func TestAfterTimes(t *testing.T) {
+	t.Parallel()
+	// Using After(10ms) but waiting for 500ms to read the channel
+	// should produce a time from start+10ms, not start+500ms.
+	// Make sure it does.
+	// To avoid flakes due to very long scheduling delays,
+	// require 10 failures in a row before deciding something is wrong.
+	for i := 0; i < 10; i++ {
+		start := Now()
+		c := After(10 * Millisecond)
+		Sleep(500 * Millisecond)
+		dt := (<-c).Sub(start)
+		if dt < 400*Millisecond {
+			return
+		}
+		t.Logf("After(10ms) time is +%v, want <400ms", dt)
+	}
+	t.Errorf("not working")
+}
+
+func TestTickTimes(t *testing.T) {
+	t.Parallel()
+	// See comment in TestAfterTimes
+	for i := 0; i < 10; i++ {
+		start := Now()
+		c := Tick(10 * Millisecond)
+		Sleep(500 * Millisecond)
+		dt := (<-c).Sub(start)
+		if dt < 400*Millisecond {
+			return
+		}
+		t.Logf("Tick(10ms) time is +%v, want <400ms", dt)
+	}
+	t.Errorf("not working")
+}
author	Russ Cox <rsc@golang.org>	2024-02-14 20:36:47 -0500
committer	Russ Cox <rsc@golang.org>	2024-03-13 21:36:04 +0000
commit	508bb17edd04479622fad263cd702deac1c49157 (patch)
tree	e30588bcd75e92b7d3ecb59645e37a9356afa25f /src/time
parent	74a0e3160d969fac27a65cd79a76214f6d1abbf5 (diff)
download	go-508bb17edd04479622fad263cd702deac1c49157.tar.xz