From 8fdc79e18a9704185bd6471b592db1e8004bd993 Mon Sep 17 00:00:00 2001
From: Chris Hines <chris.cs.guy@gmail.com>
Date: Fri, 1 May 2020 17:04:36 -0400
Subject: runtime: reduce timer latency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the scheduler to treat expired timers with the same approach it
uses to steal runnable G's.

Previously the scheduler ignored timers on P's not marked for
preemption. That had the downside that any G's waiting on those expired
timers starved until the G running on their P completed or was
preempted. That could take as long as 20ms if sysmon was in a 10ms
wake up cycle.

In addition, a spinning P that ignored an expired timer and found no
other work would stop despite there being available work, missing the
opportunity for greater parallelism.

With this change the scheduler no longer ignores timers on
non-preemptable P's or relies on sysmon as a backstop to start threads
when timers expire. Instead it wakes an idle P, if needed, when
creating a new timer because it cannot predict if the current P will
have a scheduling opportunity before the new timer expires. The P it
wakes will determine how long to sleep and block on the netpoller for
the required time, potentially stealing the new timer when it wakes.

This change also eliminates a race between a spinning P transitioning
to idle concurrently with timer creation using the same pattern used
for submission of new goroutines in the same window.

Benchmark analysis:

CL 232199, which was included in Go 1.15 improved timer latency over Go
1.14 by allowing P's to steal timers from P's not marked for preemption.
The benchmarks added in this CL measure that improvement in the
ParallelTimerLatency benchmark seen below. However, Go 1.15 still relies
on sysmon to notice expired timers in some situations and sysmon can
sleep for up to 10ms before waking to check timers. This CL fixes that
shortcoming with modest regression on other benchmarks.

name \ avg-late-ns                                        go14.time.bench  go15.time.bench  fix.time.bench
ParallelTimerLatency-8                                         17.3M ± 3%        7.9M ± 0%       0.2M ± 3%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-8        53.4k ±23%       50.7k ±31%     252.4k ± 9%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-8         204k ±14%         90k ±58%       188k ±12%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-8        1.17M ± 0%       0.11M ± 5%      0.11M ± 2%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-8        1.81M ±44%       0.10M ± 4%      0.10M ± 2%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-8        2.28M ±66%       0.09M ±13%      0.08M ±21%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-8        2.84M ±85%       0.07M ±15%      0.07M ±18%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-8        2.13M ±27%       0.06M ± 4%      0.06M ± 9%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-8        2.63M ± 6%       0.06M ±11%      0.06M ± 9%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-8        3.32M ±17%       0.06M ±16%      0.07M ±14%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-8       8.46M ±20%       4.37M ±21%      5.03M ±23%
StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-8          1.02M ± 1%       0.20M ± 2%      0.20M ± 2%

name \ max-late-ns                                        go14.time.bench  go15.time.bench  fix.time.bench
ParallelTimerLatency-8                                         18.3M ± 1%        8.2M ± 0%       0.5M ±12%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-8         141k ±19%        127k ±19%      1129k ± 3%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-8        2.78M ± 4%       1.23M ±15%      1.26M ± 5%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-8        6.05M ± 5%       0.67M ±56%      0.81M ±33%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-8        7.93M ±20%       0.71M ±46%      0.76M ±41%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-8        9.41M ±30%       0.92M ±23%      0.81M ±44%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-8        10.8M ±42%        0.8M ±41%       0.8M ±30%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-8        9.62M ±24%       0.77M ±38%      0.88M ±27%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-8        10.6M ±10%        0.8M ±32%       0.7M ±27%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-8        11.9M ±36%        0.6M ±46%       0.8M ±38%
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-8       36.8M ±21%       24.7M ±21%      27.5M ±16%
StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-8          2.12M ± 2%       1.02M ±11%      1.03M ± 7%

Other time benchmarks:
name \ time/op          go14.time.bench  go15.time.bench  fix.time.bench
AfterFunc-8                  137µs ± 4%       123µs ± 4%      131µs ± 2%
After-8                      212µs ± 3%       195µs ± 4%      204µs ± 7%
Stop-8                       165µs ± 6%       156µs ± 2%      151µs ±12%
SimultaneousAfterFunc-8      260µs ± 3%       248µs ± 3%      284µs ± 2%
StartStop-8                 65.8µs ± 9%      64.4µs ± 7%     67.3µs ±15%
Reset-8                     13.6µs ± 2%       9.6µs ± 2%      9.1µs ± 4%
Sleep-8                      307µs ± 4%       306µs ± 3%      320µs ± 2%
Ticker-8                    53.0µs ± 5%      54.5µs ± 5%     57.0µs ±11%
TickerReset-8                                9.24µs ± 2%     9.51µs ± 3%
TickerResetNaive-8                            149µs ± 5%      145µs ± 5%

Fixes #38860
Updates #25471
Updates #27707

Change-Id: If52680509b0f3b66dbd1d0c13fa574bd2d0bbd57
Reviewed-on: https://go-review.googlesource.com/c/go/+/232298
Run-TryBot: Alberto Donizetti <alb.donizetti@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Trust: Ian Lance Taylor <iant@golang.org>
---
 src/time/sleep_test.go | 187 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 187 insertions(+)

(limited to 'src/time/sleep_test.go')

diff --git a/src/time/sleep_test.go b/src/time/sleep_test.go
index f5678020b9..ba0016bf49 100644
--- a/src/time/sleep_test.go
+++ b/src/time/sleep_test.go
@@ -501,3 +501,190 @@ func TestZeroTimerStopPanics(t *testing.T) {
 	var tr Timer
 	tr.Stop()
 }
+
+// Benchmark timer latency when the thread that creates the timer is busy with
+// other work and the timers must be serviced by other threads.
+// https://golang.org/issue/38860
+func BenchmarkParallelTimerLatency(b *testing.B) {
+	gmp := runtime.GOMAXPROCS(0)
+	if gmp < 2 || runtime.NumCPU() < gmp {
+		b.Skip("skipping with GOMAXPROCS < 2 or NumCPU < GOMAXPROCS")
+	}
+
+	// allocate memory now to avoid GC interference later.
+	timerCount := gmp - 1
+	stats := make([]struct {
+		sum   float64
+		max   Duration
+		count int64
+		_     [5]int64 // cache line padding
+	}, timerCount)
+
+	// Ensure the time to start new threads to service timers will not pollute
+	// the results.
+	warmupScheduler(gmp)
+
+	// Note that other than the AfterFunc calls this benchmark is measuring it
+	// avoids using any other timers. In particular, the main goroutine uses
+	// doWork to spin for some durations because up through Go 1.15 if all
+	// threads are idle sysmon could leave deep sleep when we wake.
+
+	// Ensure sysmon is in deep sleep.
+	doWork(30 * Millisecond)
+
+	b.ResetTimer()
+
+	const delay = Millisecond
+	var wg sync.WaitGroup
+	var count int32
+	for i := 0; i < b.N; i++ {
+		wg.Add(timerCount)
+		atomic.StoreInt32(&count, 0)
+		for j := 0; j < timerCount; j++ {
+			j := j
+			expectedWakeup := Now().Add(delay)
+			AfterFunc(delay, func() {
+				late := Since(expectedWakeup)
+				if late < 0 {
+					late = 0
+				}
+				stats[j].count++
+				stats[j].sum += float64(late.Nanoseconds())
+				if late > stats[j].max {
+					stats[j].max = late
+				}
+				atomic.AddInt32(&count, 1)
+				for atomic.LoadInt32(&count) < int32(timerCount) {
+					// spin until all timers fired
+				}
+				wg.Done()
+			})
+		}
+
+		for atomic.LoadInt32(&count) < int32(timerCount) {
+			// spin until all timers fired
+		}
+		wg.Wait()
+
+		// Spin for a bit to let the other scheduler threads go idle before the
+		// next round.
+		doWork(Millisecond)
+	}
+	var total float64
+	var samples float64
+	max := Duration(0)
+	for _, s := range stats {
+		if s.max > max {
+			max = s.max
+		}
+		total += s.sum
+		samples += float64(s.count)
+	}
+	b.ReportMetric(0, "ns/op")
+	b.ReportMetric(total/samples, "avg-late-ns")
+	b.ReportMetric(float64(max.Nanoseconds()), "max-late-ns")
+}
+
+// Benchmark timer latency with staggered wakeup times and varying CPU bound
+// workloads. https://golang.org/issue/38860
+func BenchmarkStaggeredTickerLatency(b *testing.B) {
+	gmp := runtime.GOMAXPROCS(0)
+	if gmp < 2 || runtime.NumCPU() < gmp {
+		b.Skip("skipping with GOMAXPROCS < 2 or NumCPU < GOMAXPROCS")
+	}
+
+	const delay = 3 * Millisecond
+
+	for _, dur := range []Duration{300 * Microsecond, 2 * Millisecond} {
+		b.Run(fmt.Sprintf("work-dur=%s", dur), func(b *testing.B) {
+			for tickersPerP := 1; tickersPerP < int(delay/dur)+1; tickersPerP++ {
+				tickerCount := gmp * tickersPerP
+				b.Run(fmt.Sprintf("tickers-per-P=%d", tickersPerP), func(b *testing.B) {
+					// allocate memory now to avoid GC interference later.
+					stats := make([]struct {
+						sum   float64
+						max   Duration
+						count int64
+						_     [5]int64 // cache line padding
+					}, tickerCount)
+
+					// Ensure the time to start new threads to service timers
+					// will not pollute the results.
+					warmupScheduler(gmp)
+
+					b.ResetTimer()
+
+					var wg sync.WaitGroup
+					wg.Add(tickerCount)
+					for j := 0; j < tickerCount; j++ {
+						j := j
+						doWork(delay / Duration(gmp))
+						expectedWakeup := Now().Add(delay)
+						ticker := NewTicker(delay)
+						go func(c int, ticker *Ticker, firstWake Time) {
+							defer ticker.Stop()
+
+							for ; c > 0; c-- {
+								<-ticker.C
+								late := Since(expectedWakeup)
+								if late < 0 {
+									late = 0
+								}
+								stats[j].count++
+								stats[j].sum += float64(late.Nanoseconds())
+								if late > stats[j].max {
+									stats[j].max = late
+								}
+								expectedWakeup = expectedWakeup.Add(delay)
+								doWork(dur)
+							}
+							wg.Done()
+						}(b.N, ticker, expectedWakeup)
+					}
+					wg.Wait()
+
+					var total float64
+					var samples float64
+					max := Duration(0)
+					for _, s := range stats {
+						if s.max > max {
+							max = s.max
+						}
+						total += s.sum
+						samples += float64(s.count)
+					}
+					b.ReportMetric(0, "ns/op")
+					b.ReportMetric(total/samples, "avg-late-ns")
+					b.ReportMetric(float64(max.Nanoseconds()), "max-late-ns")
+				})
+			}
+		})
+	}
+}
+
+// warmupScheduler ensures the scheduler has at least targetThreadCount threads
+// in its thread pool.
+func warmupScheduler(targetThreadCount int) {
+	var wg sync.WaitGroup
+	var count int32
+	for i := 0; i < targetThreadCount; i++ {
+		wg.Add(1)
+		go func() {
+			atomic.AddInt32(&count, 1)
+			for atomic.LoadInt32(&count) < int32(targetThreadCount) {
+				// spin until all threads started
+			}
+
+			// spin a bit more to ensure they are all running on separate CPUs.
+			doWork(Millisecond)
+			wg.Done()
+		}()
+	}
+	wg.Wait()
+}
+
+func doWork(dur Duration) {
+	start := Now()
+	for Since(start) < dur {
+	}
+}
-- 
cgit v1.3


From b78b427be5e4c8a51a2b01b39c1ce6c4f39a93dc Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Wed, 2 Dec 2020 12:19:13 -0500
Subject: runtime, time: strictly enforce when, period constraints

timer.when must always be positive. addtimer and modtimer already check
that it is non-negative; we expand it to include zero. Also upgrade from
pinning bad values to throwing, as these values shouldn't be possible to
pass (except as below).

timeSleep may overflow timer.nextwhen. This would previously have been
pinned by resetForSleep, now we fix it manually.

runOneTimer may overflow timer.when when adding timer.period. Detect
this and pin to maxWhen.

addtimer is now too strict to allow TestOverflowRuntimeTimer to test an
overflowed timer. Such a timer should not be possible; to help guard
against accidental inclusion siftup / siftdown will check timers as it
goes. This has been replaced with tests for period and sleep overflows.

Change-Id: I17f9739e27ebcb20d87945c635050316fb8e9226
Reviewed-on: https://go-review.googlesource.com/c/go/+/274853
Trust: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/time.go       | 31 +++++++++++++++++++++++++------
 src/time/internal_test.go | 42 +++++++++++++++++-------------------------
 src/time/sleep.go         |  2 ++
 src/time/sleep_test.go    | 23 ++++++++++++++++-------
 4 files changed, 60 insertions(+), 38 deletions(-)

(limited to 'src/time/sleep_test.go')

diff --git a/src/runtime/time.go b/src/runtime/time.go
index 83d93c5686..d338705b7c 100644
--- a/src/runtime/time.go
+++ b/src/runtime/time.go
@@ -187,6 +187,9 @@ func timeSleep(ns int64) {
 	t.f = goroutineReady
 	t.arg = gp
 	t.nextwhen = nanotime() + ns
+	if t.nextwhen < 0 { // check for overflow.
+		t.nextwhen = maxWhen
+	}
 	gopark(resetForSleep, unsafe.Pointer(t), waitReasonSleep, traceEvGoSleep, 1)
 }
 
@@ -244,10 +247,14 @@ func goroutineReady(arg interface{}, seq uintptr) {
 // That avoids the risk of changing the when field of a timer in some P's heap,
 // which could cause the heap to become unsorted.
 func addtimer(t *timer) {
-	// when must never be negative; otherwise runtimer will overflow
-	// during its delta calculation and never expire other runtime timers.
-	if t.when < 0 {
-		t.when = maxWhen
+	// when must be positive. A negative value will cause runtimer to
+	// overflow during its delta calculation and never expire other runtime
+	// timers. Zero will cause checkTimers to fail to notice the timer.
+	if t.when <= 0 {
+		throw("timer when must be positive")
+	}
+	if t.period < 0 {
+		throw("timer period must be non-negative")
 	}
 	if t.status != timerNoStatus {
 		throw("addtimer called with initialized timer")
@@ -408,8 +415,11 @@ func dodeltimer0(pp *p) {
 // This is called by the netpoll code or time.Ticker.Reset or time.Timer.Reset.
 // Reports whether the timer was modified before it was run.
 func modtimer(t *timer, when, period int64, f func(interface{}, uintptr), arg interface{}, seq uintptr) bool {
-	if when < 0 {
-		when = maxWhen
+	if when <= 0 {
+		throw("timer when must be positive")
+	}
+	if period < 0 {
+		throw("timer period must be non-negative")
 	}
 
 	status := uint32(timerNoStatus)
@@ -848,6 +858,9 @@ func runOneTimer(pp *p, t *timer, now int64) {
 		// Leave in heap but adjust next time to fire.
 		delta := t.when - now
 		t.when += t.period * (1 + -delta/t.period)
+		if t.when < 0 { // check for overflow.
+			t.when = maxWhen
+		}
 		siftdownTimer(pp.timers, 0)
 		if !atomic.Cas(&t.status, timerRunning, timerWaiting) {
 			badTimer()
@@ -1066,6 +1079,9 @@ func siftupTimer(t []*timer, i int) {
 		badTimer()
 	}
 	when := t[i].when
+	if when <= 0 {
+		badTimer()
+	}
 	tmp := t[i]
 	for i > 0 {
 		p := (i - 1) / 4 // parent
@@ -1086,6 +1102,9 @@ func siftdownTimer(t []*timer, i int) {
 		badTimer()
 	}
 	when := t[i].when
+	if when <= 0 {
+		badTimer()
+	}
 	tmp := t[i]
 	for {
 		c := i*4 + 1 // left child
diff --git a/src/time/internal_test.go b/src/time/internal_test.go
index e70b6f34de..ffe54e47c2 100644
--- a/src/time/internal_test.go
+++ b/src/time/internal_test.go
@@ -33,38 +33,30 @@ var DaysIn = daysIn
 
 func empty(arg interface{}, seq uintptr) {}
 
-// Test that a runtimeTimer with a duration so large it overflows
-// does not cause other timers to hang.
+// Test that a runtimeTimer with a period that would overflow when on
+// expiration does not throw or cause other timers to hang.
 //
 // This test has to be in internal_test.go since it fiddles with
 // unexported data structures.
-func CheckRuntimeTimerOverflow() {
-	// We manually create a runtimeTimer to bypass the overflow
-	// detection logic in NewTimer: we're testing the underlying
-	// runtime.addtimer function.
+func CheckRuntimeTimerPeriodOverflow() {
+	// We manually create a runtimeTimer with huge period, but that expires
+	// immediately. The public Timer interface would require waiting for
+	// the entire period before the first update.
 	r := &runtimeTimer{
-		when: runtimeNano() + (1<<63 - 1),
-		f:    empty,
-		arg:  nil,
+		when:   runtimeNano(),
+		period: 1<<63 - 1,
+		f:      empty,
+		arg:    nil,
 	}
 	startTimer(r)
+	defer stopTimer(r)
 
-	// Start a goroutine that should send on t.C right away.
-	t := NewTimer(1)
-
-	defer func() {
-		stopTimer(r)
-		t.Stop()
-	}()
-
-	// If the test fails, we will hang here until the timeout in the
-	// testing package fires, which is 10 minutes. It would be nice to
-	// catch the problem sooner, but there is no reliable way to guarantee
-	// that timers are run without doing something involving the scheduler.
-	// Previous failed attempts have tried calling runtime.Gosched and
-	// runtime.GC, but neither is reliable. So we fall back to hope:
-	// We hope we don't hang here.
-	<-t.C
+	// If this test fails, we will either throw (when siftdownTimer detects
+	// bad when on update), or other timers will hang (if the timer in a
+	// heap is in a bad state). There is no reliable way to test this, but
+	// we wait on a short timer here as a smoke test (alternatively, timers
+	// in later tests may hang).
+	<-After(25 * Millisecond)
 }
 
 var (
diff --git a/src/time/sleep.go b/src/time/sleep.go
index 22ffd68282..90d8a18a68 100644
--- a/src/time/sleep.go
+++ b/src/time/sleep.go
@@ -31,6 +31,8 @@ func when(d Duration) int64 {
 	}
 	t := runtimeNano() + int64(d)
 	if t < 0 {
+		// N.B. runtimeNano() and d are always positive, so addition
+		// (including overflow) will never result in t == 0.
 		t = 1<<63 - 1 // math.MaxInt64
 	}
 	return t
diff --git a/src/time/sleep_test.go b/src/time/sleep_test.go
index ba0016bf49..084ac33f51 100644
--- a/src/time/sleep_test.go
+++ b/src/time/sleep_test.go
@@ -434,17 +434,29 @@ func TestReset(t *testing.T) {
 	t.Error(err)
 }
 
-// Test that sleeping for an interval so large it overflows does not
-// result in a short sleep duration.
+// Test that sleeping (via Sleep or Timer) for an interval so large it
+// overflows does not result in a short sleep duration. Nor does it interfere
+// with execution of other timers. If it does, timers in this or subsequent
+// tests may not fire.
 func TestOverflowSleep(t *testing.T) {
 	const big = Duration(int64(1<<63 - 1))
+
+	go func() {
+		Sleep(big)
+		// On failure, this may return after the test has completed, so
+		// we need to panic instead.
+		panic("big sleep returned")
+	}()
+
 	select {
 	case <-After(big):
 		t.Fatalf("big timeout fired")
 	case <-After(25 * Millisecond):
 		// OK
 	}
+
 	const neg = Duration(-1 << 63)
+	Sleep(neg) // Returns immediately.
 	select {
 	case <-After(neg):
 		// OK
@@ -473,13 +485,10 @@ func TestIssue5745(t *testing.T) {
 	t.Error("Should be unreachable.")
 }
 
-func TestOverflowRuntimeTimer(t *testing.T) {
-	if testing.Short() {
-		t.Skip("skipping in short mode, see issue 6874")
-	}
+func TestOverflowPeriodRuntimeTimer(t *testing.T) {
 	// This may hang forever if timers are broken. See comment near
 	// the end of CheckRuntimeTimerOverflow in internal_test.go.
-	CheckRuntimeTimerOverflow()
+	CheckRuntimeTimerPeriodOverflow()
 }
 
 func checkZeroPanicString(t *testing.T) {
-- 
cgit v1.3