From b4b014465216790e01aa66f9120d03230e4aff46 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 29 Sep 2020 17:01:33 -0700 Subject: runtime: don't always adjust timers Some programs have a lot of timers that they adjust both forward and backward in time. This can cause a large number of timerModifiedEarlier timers. In practice these timers are used for I/O deadlines and are rarely reached. The effect is that the runtime spends a lot of time in adjusttimers making sure that there are no timerModifiedEarlier timers, but the effort is wasted because none of the adjusted timers are near the top of the timer heap anyhow. Avoid much of this extra work by keeping track of the earliest known timerModifiedEarlier timer. This lets us skip adjusttimers if we know that none of the timers will be ready to run anyhow. We will still eventually run it, when we reach the deadline of the earliest known timerModifiedEarlier, although in practice that timer has likely been removed. When we do run adjusttimers, we will reset all of the timerModifiedEarlier timers, and clear our notion of when we need to run adjusttimers again. This effect should be to significantly reduce the number of times we walk through the timer list in adjusttimers. Fixes #41699 Change-Id: I38eb2be611fb34e3017bb33d0a9ed40d75fb414f Reviewed-on: https://go-review.googlesource.com/c/go/+/258303 Trust: Ian Lance Taylor Trust: Emmanuel Odeke Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Michael Knyszek --- src/runtime/time.go | 99 +++++++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 44 deletions(-) (limited to 'src/runtime/time.go') diff --git a/src/runtime/time.go b/src/runtime/time.go index f895bf8443..99290f66d0 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -491,6 +491,8 @@ loop: newStatus = timerModifiedEarlier } + tpp := t.pp.ptr() + // Update the adjustTimers field. Subtract one if we // are removing a timerModifiedEarlier, add one if we // are adding a timerModifiedEarlier. @@ -500,9 +502,10 @@ loop: } if newStatus == timerModifiedEarlier { adjust++ + updateTimerModifiedEarliest(tpp, when) } if adjust != 0 { - atomic.Xadd(&t.pp.ptr().adjustTimers, adjust) + atomic.Xadd(&tpp.adjustTimers, adjust) } // Set the new status of the timer. @@ -637,16 +640,36 @@ func moveTimers(pp *p, timers []*timer) { // the correct place in the heap. While looking for those timers, // it also moves timers that have been modified to run later, // and removes deleted timers. The caller must have locked the timers for pp. -func adjusttimers(pp *p) { - if len(pp.timers) == 0 { - return - } +func adjusttimers(pp *p, now int64) { if atomic.Load(&pp.adjustTimers) == 0 { if verifyTimers { verifyTimerHeap(pp) } + // There are no timers to adjust, so it is safe to clear + // timerModifiedEarliest. Do so in case it is stale. + // Everything will work if we don't do this, + // but clearing here may save future calls to adjusttimers. + atomic.Store64(&pp.timerModifiedEarliest, 0) return } + + // If we haven't yet reached the time of the first timerModifiedEarlier + // timer, don't do anything. This speeds up programs that adjust + // a lot of timers back and forth if the timers rarely expire. + // We'll postpone looking through all the adjusted timers until + // one would actually expire. + if first := atomic.Load64(&pp.timerModifiedEarliest); first != 0 { + if int64(first) > now { + if verifyTimers { + verifyTimerHeap(pp) + } + return + } + + // We are going to clear all timerModifiedEarlier timers. + atomic.Store64(&pp.timerModifiedEarliest, 0) + } + var moved []*timer loop: for i := 0; i < len(pp.timers); i++ { @@ -868,6 +891,10 @@ func runOneTimer(pp *p, t *timer, now int64) { // // The caller must have locked the timers for pp. func clearDeletedTimers(pp *p) { + // We are going to clear all timerModifiedEarlier timers. + // Do this now in case new ones show up while we are looping. + atomic.Store64(&pp.timerModifiedEarliest, 0) + cdel := int32(0) cearlier := int32(0) to := 0 @@ -977,6 +1004,21 @@ func updateTimer0When(pp *p) { } } +// updateTimerModifiedEarliest updates the recorded nextwhen field of the +// earlier timerModifiedEarier value. +// The timers for pp will not be locked. +func updateTimerModifiedEarliest(pp *p, nextwhen int64) { + for { + old := atomic.Load64(&pp.timerModifiedEarliest) + if old != 0 && int64(old) < nextwhen { + return + } + if atomic.Cas64(&pp.timerModifiedEarliest, old, uint64(nextwhen)) { + return + } + } +} + // timeSleepUntil returns the time when the next timer should fire, // and the P that holds the timer heap that that timer is on. // This is only called by sysmon and checkdead. @@ -993,48 +1035,17 @@ func timeSleepUntil() (int64, *p) { continue } - c := atomic.Load(&pp.adjustTimers) - if c == 0 { - w := int64(atomic.Load64(&pp.timer0When)) - if w != 0 && w < next { - next = w - pret = pp - } - continue + w := int64(atomic.Load64(&pp.timer0When)) + if w != 0 && w < next { + next = w + pret = pp } - lock(&pp.timersLock) - for _, t := range pp.timers { - switch s := atomic.Load(&t.status); s { - case timerWaiting: - if t.when < next { - next = t.when - } - case timerModifiedEarlier, timerModifiedLater: - if t.nextwhen < next { - next = t.nextwhen - } - if s == timerModifiedEarlier { - c-- - } - } - // The timers are sorted, so we only have to check - // the first timer for each P, unless there are - // some timerModifiedEarlier timers. The number - // of timerModifiedEarlier timers is in the adjustTimers - // field, used to initialize c, above. - // - // We don't worry about cases like timerModifying. - // New timers can show up at any time, - // so this function is necessarily imprecise. - // Do a signed check here since we aren't - // synchronizing the read of pp.adjustTimers - // with the check of a timer status. - if int32(c) <= 0 { - break - } + w = int64(atomic.Load64(&pp.timerModifiedEarliest)) + if w != 0 && w < next { + next = w + pret = pp } - unlock(&pp.timersLock) } unlock(&allpLock) -- cgit v1.3 From e02ab89eb8994fa6f2dfa2924cdadb097633fcc1 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 29 Oct 2020 16:23:27 -0700 Subject: runtime: simplify nobarrierWakeTime Also use the simplified nobarrierWakeTime in findrunnable, as it no longer needs the current time. Change-Id: I77b125d6a184dde0aeb517fc068164c274f0a046 Reviewed-on: https://go-review.googlesource.com/c/go/+/266304 Trust: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Go Bot Reviewed-by: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/proc.go | 15 +++------------ src/runtime/time.go | 13 ++++++------- 2 files changed, 9 insertions(+), 19 deletions(-) (limited to 'src/runtime/time.go') diff --git a/src/runtime/proc.go b/src/runtime/proc.go index c97f4820da..939757f3a7 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -2659,18 +2659,9 @@ stop: // checkTimers here because it calls adjusttimers which may need to allocate // memory, and that isn't allowed when we don't have an active P. for _, _p_ := range allpSnapshot { - // This is similar to nobarrierWakeTime, but minimizes calls to - // nanotime. - if atomic.Load(&_p_.adjustTimers) > 0 { - if now == 0 { - now = nanotime() - } - pollUntil = now - } else { - w := int64(atomic.Load64(&_p_.timer0When)) - if w != 0 && (pollUntil == 0 || w < pollUntil) { - pollUntil = w - } + w := nobarrierWakeTime(_p_) + if w != 0 && (pollUntil == 0 || w < pollUntil) { + pollUntil = w } } if pollUntil != 0 { diff --git a/src/runtime/time.go b/src/runtime/time.go index 99290f66d0..75b66f8492 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -742,16 +742,15 @@ func addAdjustedTimers(pp *p, moved []*timer) { // nobarrierWakeTime looks at P's timers and returns the time when we // should wake up the netpoller. It returns 0 if there are no timers. // This function is invoked when dropping a P, and must run without -// any write barriers. Therefore, if there are any timers that needs -// to be moved earlier, it conservatively returns the current time. -// The netpoller M will wake up and adjust timers before sleeping again. +// any write barriers. //go:nowritebarrierrec func nobarrierWakeTime(pp *p) int64 { - if atomic.Load(&pp.adjustTimers) > 0 { - return nanotime() - } else { - return int64(atomic.Load64(&pp.timer0When)) + next := int64(atomic.Load64(&pp.timer0When)) + nextAdj := int64(atomic.Load64(&pp.timerModifiedEarliest)) + if next == 0 || (nextAdj != 0 && nextAdj < next) { + next = nextAdj } + return next } // runtimer examines the first timer in timers. If it is ready based on now, -- cgit v1.3 From b635e4b808bf45ebd66e9f687e18b9af6bd634c1 Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Tue, 1 Dec 2020 17:24:33 -0500 Subject: time, runtime: don't set timer when = 0 timer when == 0, in the context of timer0When and timerModifiedEarliest, is a sentinel value meaning there are no timers on the heap. TestCheckRuntimeTimerOverflow reaching into the runtime to set a timer to when = 0 when it is otherwise not possible breaks this invariant. After golang.org/cl/258303, we will no longer detect and run this timer, thus blocking any other timers lower on the heap from running. This manifests as random timers failing to fire in other tests. The need to set this overflowed timer to when = 0 is gone with the old timer proc implementation, so we can simply remove it. Fixes #42424 Change-Id: Iea32100136ad8ec1bedfa77b1e7d9ed868812838 Reviewed-on: https://go-review.googlesource.com/c/go/+/274632 Reviewed-by: Ian Lance Taylor Run-TryBot: Michael Pratt TryBot-Result: Go Bot Trust: Michael Pratt --- src/runtime/time.go | 2 ++ src/time/internal_test.go | 10 ---------- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'src/runtime/time.go') diff --git a/src/runtime/time.go b/src/runtime/time.go index 75b66f8492..83d93c5686 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -23,6 +23,8 @@ type timer struct { // Timer wakes up at when, and then at when+period, ... (period > 0 only) // each time calling f(arg, now) in the timer goroutine, so f must be // a well-behaved function and not block. + // + // when must be positive on an active timer. when int64 period int64 f func(interface{}, uintptr) diff --git a/src/time/internal_test.go b/src/time/internal_test.go index 35ce69b228..e70b6f34de 100644 --- a/src/time/internal_test.go +++ b/src/time/internal_test.go @@ -53,18 +53,8 @@ func CheckRuntimeTimerOverflow() { t := NewTimer(1) defer func() { - // Subsequent tests won't work correctly if we don't stop the - // overflow timer and kick the timer proc back into service. - // - // The timer proc is now sleeping and can only be awoken by - // adding a timer to the *beginning* of the heap. We can't - // wake it up by calling NewTimer since other tests may have - // left timers running that should have expired before ours. - // Instead we zero the overflow timer duration and start it - // once more. stopTimer(r) t.Stop() - resetTimer(r, 0) }() // If the test fails, we will hang here until the timeout in the -- cgit v1.3 From b78b427be5e4c8a51a2b01b39c1ce6c4f39a93dc Mon Sep 17 00:00:00 2001 From: Michael Pratt Date: Wed, 2 Dec 2020 12:19:13 -0500 Subject: runtime, time: strictly enforce when, period constraints timer.when must always be positive. addtimer and modtimer already check that it is non-negative; we expand it to include zero. Also upgrade from pinning bad values to throwing, as these values shouldn't be possible to pass (except as below). timeSleep may overflow timer.nextwhen. This would previously have been pinned by resetForSleep, now we fix it manually. runOneTimer may overflow timer.when when adding timer.period. Detect this and pin to maxWhen. addtimer is now too strict to allow TestOverflowRuntimeTimer to test an overflowed timer. Such a timer should not be possible; to help guard against accidental inclusion siftup / siftdown will check timers as it goes. This has been replaced with tests for period and sleep overflows. Change-Id: I17f9739e27ebcb20d87945c635050316fb8e9226 Reviewed-on: https://go-review.googlesource.com/c/go/+/274853 Trust: Michael Pratt Reviewed-by: Michael Knyszek Reviewed-by: Ian Lance Taylor --- src/runtime/time.go | 31 +++++++++++++++++++++++++------ src/time/internal_test.go | 42 +++++++++++++++++------------------------- src/time/sleep.go | 2 ++ src/time/sleep_test.go | 23 ++++++++++++++++------- 4 files changed, 60 insertions(+), 38 deletions(-) (limited to 'src/runtime/time.go') diff --git a/src/runtime/time.go b/src/runtime/time.go index 83d93c5686..d338705b7c 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -187,6 +187,9 @@ func timeSleep(ns int64) { t.f = goroutineReady t.arg = gp t.nextwhen = nanotime() + ns + if t.nextwhen < 0 { // check for overflow. + t.nextwhen = maxWhen + } gopark(resetForSleep, unsafe.Pointer(t), waitReasonSleep, traceEvGoSleep, 1) } @@ -244,10 +247,14 @@ func goroutineReady(arg interface{}, seq uintptr) { // That avoids the risk of changing the when field of a timer in some P's heap, // which could cause the heap to become unsorted. func addtimer(t *timer) { - // when must never be negative; otherwise runtimer will overflow - // during its delta calculation and never expire other runtime timers. - if t.when < 0 { - t.when = maxWhen + // when must be positive. A negative value will cause runtimer to + // overflow during its delta calculation and never expire other runtime + // timers. Zero will cause checkTimers to fail to notice the timer. + if t.when <= 0 { + throw("timer when must be positive") + } + if t.period < 0 { + throw("timer period must be non-negative") } if t.status != timerNoStatus { throw("addtimer called with initialized timer") @@ -408,8 +415,11 @@ func dodeltimer0(pp *p) { // This is called by the netpoll code or time.Ticker.Reset or time.Timer.Reset. // Reports whether the timer was modified before it was run. func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) bool { - if when < 0 { - when = maxWhen + if when <= 0 { + throw("timer when must be positive") + } + if period < 0 { + throw("timer period must be non-negative") } status := uint32(timerNoStatus) @@ -848,6 +858,9 @@ func runOneTimer(pp *p, t *timer, now int64) { // Leave in heap but adjust next time to fire. delta := t.when - now t.when += t.period * (1 + -delta/t.period) + if t.when < 0 { // check for overflow. + t.when = maxWhen + } siftdownTimer(pp.timers, 0) if !atomic.Cas(&t.status, timerRunning, timerWaiting) { badTimer() @@ -1066,6 +1079,9 @@ func siftupTimer(t []*timer, i int) { badTimer() } when := t[i].when + if when <= 0 { + badTimer() + } tmp := t[i] for i > 0 { p := (i - 1) / 4 // parent @@ -1086,6 +1102,9 @@ func siftdownTimer(t []*timer, i int) { badTimer() } when := t[i].when + if when <= 0 { + badTimer() + } tmp := t[i] for { c := i*4 + 1 // left child diff --git a/src/time/internal_test.go b/src/time/internal_test.go index e70b6f34de..ffe54e47c2 100644 --- a/src/time/internal_test.go +++ b/src/time/internal_test.go @@ -33,38 +33,30 @@ var DaysIn = daysIn func empty(arg interface{}, seq uintptr) {} -// Test that a runtimeTimer with a duration so large it overflows -// does not cause other timers to hang. +// Test that a runtimeTimer with a period that would overflow when on +// expiration does not throw or cause other timers to hang. // // This test has to be in internal_test.go since it fiddles with // unexported data structures. -func CheckRuntimeTimerOverflow() { - // We manually create a runtimeTimer to bypass the overflow - // detection logic in NewTimer: we're testing the underlying - // runtime.addtimer function. +func CheckRuntimeTimerPeriodOverflow() { + // We manually create a runtimeTimer with huge period, but that expires + // immediately. The public Timer interface would require waiting for + // the entire period before the first update. r := &runtimeTimer{ - when: runtimeNano() + (1<<63 - 1), - f: empty, - arg: nil, + when: runtimeNano(), + period: 1<<63 - 1, + f: empty, + arg: nil, } startTimer(r) + defer stopTimer(r) - // Start a goroutine that should send on t.C right away. - t := NewTimer(1) - - defer func() { - stopTimer(r) - t.Stop() - }() - - // If the test fails, we will hang here until the timeout in the - // testing package fires, which is 10 minutes. It would be nice to - // catch the problem sooner, but there is no reliable way to guarantee - // that timers are run without doing something involving the scheduler. - // Previous failed attempts have tried calling runtime.Gosched and - // runtime.GC, but neither is reliable. So we fall back to hope: - // We hope we don't hang here. - <-t.C + // If this test fails, we will either throw (when siftdownTimer detects + // bad when on update), or other timers will hang (if the timer in a + // heap is in a bad state). There is no reliable way to test this, but + // we wait on a short timer here as a smoke test (alternatively, timers + // in later tests may hang). + <-After(25 * Millisecond) } var ( diff --git a/src/time/sleep.go b/src/time/sleep.go index 22ffd68282..90d8a18a68 100644 --- a/src/time/sleep.go +++ b/src/time/sleep.go @@ -31,6 +31,8 @@ func when(d Duration) int64 { } t := runtimeNano() + int64(d) if t < 0 { + // N.B. runtimeNano() and d are always positive, so addition + // (including overflow) will never result in t == 0. t = 1<<63 - 1 // math.MaxInt64 } return t diff --git a/src/time/sleep_test.go b/src/time/sleep_test.go index ba0016bf49..084ac33f51 100644 --- a/src/time/sleep_test.go +++ b/src/time/sleep_test.go @@ -434,17 +434,29 @@ func TestReset(t *testing.T) { t.Error(err) } -// Test that sleeping for an interval so large it overflows does not -// result in a short sleep duration. +// Test that sleeping (via Sleep or Timer) for an interval so large it +// overflows does not result in a short sleep duration. Nor does it interfere +// with execution of other timers. If it does, timers in this or subsequent +// tests may not fire. func TestOverflowSleep(t *testing.T) { const big = Duration(int64(1<<63 - 1)) + + go func() { + Sleep(big) + // On failure, this may return after the test has completed, so + // we need to panic instead. + panic("big sleep returned") + }() + select { case <-After(big): t.Fatalf("big timeout fired") case <-After(25 * Millisecond): // OK } + const neg = Duration(-1 << 63) + Sleep(neg) // Returns immediately. select { case <-After(neg): // OK @@ -473,13 +485,10 @@ func TestIssue5745(t *testing.T) { t.Error("Should be unreachable.") } -func TestOverflowRuntimeTimer(t *testing.T) { - if testing.Short() { - t.Skip("skipping in short mode, see issue 6874") - } +func TestOverflowPeriodRuntimeTimer(t *testing.T) { // This may hang forever if timers are broken. See comment near // the end of CheckRuntimeTimerOverflow in internal_test.go. - CheckRuntimeTimerOverflow() + CheckRuntimeTimerPeriodOverflow() } func checkZeroPanicString(t *testing.T) { -- cgit v1.3