diff options
| author | Aliaksandr Valialkin <valyala@gmail.com> | 2017-01-06 15:44:17 +0200 |
|---|---|---|
| committer | Ian Lance Taylor <iant@golang.org> | 2017-09-12 16:52:23 +0000 |
| commit | 76f4fd8a5251b4f63ea14a3c1e2fe2e78eb74f81 (patch) | |
| tree | cb06b397976e06bb2b62f171517c5de484e567fa /src/runtime | |
| parent | 7377d0c7e972397969382315df915579e32db025 (diff) | |
| download | go-76f4fd8a5251b4f63ea14a3c1e2fe2e78eb74f81.tar.xz | |
runtime: improve timers scalability on multi-CPU systems
Use per-P timers, so each P may work with its own timers.
This CL improves performance on multi-CPU systems
in the following cases:
- When serving high number of concurrent connections
with read/write deadlines set (for instance, highly loaded
net/http server).
- When using high number of concurrent timers. These timers
may be implicitly created via context.WithDeadline
or context.WithTimeout.
Production servers should usually set timeout on connections
and external requests in order to prevent from resource leakage.
See https://blog.cloudflare.com/the-complete-guide-to-golang-net-http-timeouts/
Below are relevant benchmark results for various GOMAXPROCS values
on linux/amd64:
context package:
name old time/op new time/op delta
WithTimeout/concurrency=40 4.92µs ± 0% 5.17µs ± 1% +5.07% (p=0.000 n=9+9)
WithTimeout/concurrency=4000 6.03µs ± 1% 6.49µs ± 0% +7.63% (p=0.000 n=8+10)
WithTimeout/concurrency=400000 8.58µs ± 7% 9.02µs ± 4% +5.02% (p=0.019 n=10+10)
name old time/op new time/op delta
WithTimeout/concurrency=40-2 3.70µs ± 1% 2.78µs ± 4% -24.90% (p=0.000 n=8+9)
WithTimeout/concurrency=4000-2 4.49µs ± 4% 3.67µs ± 5% -18.26% (p=0.000 n=10+10)
WithTimeout/concurrency=400000-2 6.16µs ±10% 5.15µs ±13% -16.30% (p=0.000 n=10+10)
name old time/op new time/op delta
WithTimeout/concurrency=40-4 3.58µs ± 1% 2.64µs ± 2% -26.13% (p=0.000 n=9+10)
WithTimeout/concurrency=4000-4 4.17µs ± 0% 3.32µs ± 1% -20.36% (p=0.000 n=10+10)
WithTimeout/concurrency=400000-4 5.57µs ± 9% 4.83µs ±10% -13.27% (p=0.001 n=10+10)
time package:
name old time/op new time/op delta
AfterFunc 6.15ms ± 3% 6.07ms ± 2% ~ (p=0.133 n=10+9)
AfterFunc-2 3.43ms ± 1% 3.56ms ± 1% +3.91% (p=0.000 n=10+9)
AfterFunc-4 5.04ms ± 2% 2.36ms ± 0% -53.20% (p=0.000 n=10+9)
After 6.54ms ± 2% 6.49ms ± 3% ~ (p=0.393 n=10+10)
After-2 3.68ms ± 1% 3.87ms ± 0% +5.14% (p=0.000 n=9+9)
After-4 6.66ms ± 1% 2.87ms ± 1% -56.89% (p=0.000 n=10+10)
Stop 698µs ± 2% 689µs ± 1% -1.26% (p=0.011 n=10+10)
Stop-2 729µs ± 2% 434µs ± 3% -40.49% (p=0.000 n=10+10)
Stop-4 837µs ± 3% 333µs ± 2% -60.20% (p=0.000 n=10+10)
SimultaneousAfterFunc 694µs ± 1% 692µs ± 7% ~ (p=0.481 n=10+10)
SimultaneousAfterFunc-2 714µs ± 3% 569µs ± 2% -20.33% (p=0.000 n=10+10)
SimultaneousAfterFunc-4 782µs ± 2% 386µs ± 2% -50.67% (p=0.000 n=10+10)
StartStop 267µs ± 3% 274µs ± 0% +2.64% (p=0.000 n=8+9)
StartStop-2 238µs ± 2% 140µs ± 3% -40.95% (p=0.000 n=10+8)
StartStop-4 320µs ± 1% 125µs ± 1% -61.02% (p=0.000 n=9+9)
Reset 75.0µs ± 1% 77.5µs ± 2% +3.38% (p=0.000 n=10+10)
Reset-2 150µs ± 2% 40µs ± 5% -73.09% (p=0.000 n=10+9)
Reset-4 226µs ± 1% 33µs ± 1% -85.42% (p=0.000 n=10+10)
Sleep 857µs ± 6% 878µs ± 9% ~ (p=0.079 n=10+9)
Sleep-2 617µs ± 4% 585µs ± 2% -5.21% (p=0.000 n=10+10)
Sleep-4 689µs ± 3% 465µs ± 4% -32.53% (p=0.000 n=10+10)
Ticker 55.9ms ± 2% 55.9ms ± 2% ~ (p=0.971 n=10+10)
Ticker-2 28.7ms ± 2% 28.1ms ± 1% -2.06% (p=0.000 n=10+10)
Ticker-4 14.6ms ± 0% 13.6ms ± 1% -6.80% (p=0.000 n=9+10)
Fixes #15133
Change-Id: I6f4b09d2db8c5bec93146db6501b44dbfe5c0ac4
Reviewed-on: https://go-review.googlesource.com/34784
Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime')
| -rw-r--r-- | src/runtime/proc.go | 12 | ||||
| -rw-r--r-- | src/runtime/time.go | 229 | ||||
| -rw-r--r-- | src/runtime/trace.go | 9 |
3 files changed, 162 insertions, 88 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 21fff7de5d..98753551a9 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -3863,15 +3863,11 @@ func sysmon() { } shouldRelax := true if osRelaxMinNS > 0 { - lock(&timers.lock) - if timers.sleeping { - now := nanotime() - next := timers.sleepUntil - if next-now < osRelaxMinNS { - shouldRelax = false - } + next := timeSleepUntil() + now := nanotime() + if next-now < osRelaxMinNS { + shouldRelax = false } - unlock(&timers.lock) } if shouldRelax { osRelax(true) diff --git a/src/runtime/time.go b/src/runtime/time.go index abf200d7d3..6bfa6ba160 100644 --- a/src/runtime/time.go +++ b/src/runtime/time.go @@ -6,14 +6,18 @@ package runtime -import "unsafe" +import ( + "runtime/internal/sys" + "unsafe" +) // Package time knows the layout of this structure. // If this struct changes, adjust ../time/sleep.go:/runtimeTimer. // For GOOS=nacl, package syscall knows the layout of this structure. // If this struct changes, adjust ../syscall/net_nacl.go:/runtimeTimer. type timer struct { - i int // heap index + tb *timersBucket // the bucket the timer lives in + i int // heap index // Timer wakes up at when, and then at when+period, ... (period > 0 only) // each time calling f(arg, now) in the timer goroutine, so f must be @@ -25,7 +29,37 @@ type timer struct { seq uintptr } -var timers struct { +// timersLen is the length of timers array. +// +// Ideally, this would be set to GOMAXPROCS, but that would require +// dynamic reallocation +// +// The current value is a compromise between memory usage and performance +// that should cover the majority of GOMAXPROCS values used in the wild. +const timersLen = 64 + +// timers contains "per-P" timer heaps. +// +// Timers are queued into timersBucket associated with the current P, +// so each P may work with its own timers independently of other P instances. +// +// Each timersBucket may be associated with multiple P +// if GOMAXPROCS > timersLen. +var timers [timersLen]struct { + timersBucket + + // The padding should eliminate false sharing + // between timersBucket values. + pad [sys.CacheLineSize - unsafe.Sizeof(timersBucket{})%sys.CacheLineSize]byte +} + +func (t *timer) assignBucket() *timersBucket { + id := uint8(getg().m.p.ptr().id) % timersLen + t.tb = &timers[id].timersBucket + return t.tb +} + +type timersBucket struct { lock mutex gp *g created bool @@ -51,18 +85,20 @@ func timeSleep(ns int64) { return } - t := getg().timer + gp := getg() + t := gp.timer if t == nil { t = new(timer) - getg().timer = t + gp.timer = t } *t = timer{} t.when = nanotime() + ns t.f = goroutineReady - t.arg = getg() - lock(&timers.lock) - addtimerLocked(t) - goparkunlock(&timers.lock, "sleep", traceEvGoSleep, 2) + t.arg = gp + tb := t.assignBucket() + lock(&tb.lock) + tb.addtimerLocked(t) + goparkunlock(&tb.lock, "sleep", traceEvGoSleep, 2) } // startTimer adds t to the timer heap. @@ -89,87 +125,86 @@ func goroutineReady(arg interface{}, seq uintptr) { } func addtimer(t *timer) { - lock(&timers.lock) - addtimerLocked(t) - unlock(&timers.lock) + tb := t.assignBucket() + lock(&tb.lock) + tb.addtimerLocked(t) + unlock(&tb.lock) } // Add a timer to the heap and start or kick timerproc if the new timer is // earlier than any of the others. // Timers are locked. -func addtimerLocked(t *timer) { +func (tb *timersBucket) addtimerLocked(t *timer) { // when must never be negative; otherwise timerproc will overflow // during its delta calculation and never expire other runtime timers. if t.when < 0 { t.when = 1<<63 - 1 } - t.i = len(timers.t) - timers.t = append(timers.t, t) - siftupTimer(t.i) + t.i = len(tb.t) + tb.t = append(tb.t, t) + tb.siftupTimer(t.i) if t.i == 0 { // siftup moved to top: new earliest deadline. - if timers.sleeping { - timers.sleeping = false - notewakeup(&timers.waitnote) + if tb.sleeping { + tb.sleeping = false + notewakeup(&tb.waitnote) } - if timers.rescheduling { - timers.rescheduling = false - goready(timers.gp, 0) + if tb.rescheduling { + tb.rescheduling = false + goready(tb.gp, 0) } } - if !timers.created { - timers.created = true - go timerproc() + if !tb.created { + tb.created = true + go timerproc(tb) } } // Delete timer t from the heap. // Do not need to update the timerproc: if it wakes up early, no big deal. func deltimer(t *timer) bool { - // Dereference t so that any panic happens before the lock is held. - // Discard result, because t might be moving in the heap. - _ = t.i + tb := t.tb - lock(&timers.lock) + lock(&tb.lock) // t may not be registered anymore and may have // a bogus i (typically 0, if generated by Go). // Verify it before proceeding. i := t.i - last := len(timers.t) - 1 - if i < 0 || i > last || timers.t[i] != t { - unlock(&timers.lock) + last := len(tb.t) - 1 + if i < 0 || i > last || tb.t[i] != t { + unlock(&tb.lock) return false } if i != last { - timers.t[i] = timers.t[last] - timers.t[i].i = i + tb.t[i] = tb.t[last] + tb.t[i].i = i } - timers.t[last] = nil - timers.t = timers.t[:last] + tb.t[last] = nil + tb.t = tb.t[:last] if i != last { - siftupTimer(i) - siftdownTimer(i) + tb.siftupTimer(i) + tb.siftdownTimer(i) } - unlock(&timers.lock) + unlock(&tb.lock) return true } // Timerproc runs the time-driven events. -// It sleeps until the next event in the timers heap. +// It sleeps until the next event in the tb heap. // If addtimer inserts a new earlier event, it wakes timerproc early. -func timerproc() { - timers.gp = getg() +func timerproc(tb *timersBucket) { + tb.gp = getg() for { - lock(&timers.lock) - timers.sleeping = false + lock(&tb.lock) + tb.sleeping = false now := nanotime() delta := int64(-1) for { - if len(timers.t) == 0 { + if len(tb.t) == 0 { delta = -1 break } - t := timers.t[0] + t := tb.t[0] delta = t.when - now if delta > 0 { break @@ -177,43 +212,43 @@ func timerproc() { if t.period > 0 { // leave in heap but adjust next time to fire t.when += t.period * (1 + -delta/t.period) - siftdownTimer(0) + tb.siftdownTimer(0) } else { // remove from heap - last := len(timers.t) - 1 + last := len(tb.t) - 1 if last > 0 { - timers.t[0] = timers.t[last] - timers.t[0].i = 0 + tb.t[0] = tb.t[last] + tb.t[0].i = 0 } - timers.t[last] = nil - timers.t = timers.t[:last] + tb.t[last] = nil + tb.t = tb.t[:last] if last > 0 { - siftdownTimer(0) + tb.siftdownTimer(0) } t.i = -1 // mark as removed } f := t.f arg := t.arg seq := t.seq - unlock(&timers.lock) + unlock(&tb.lock) if raceenabled { raceacquire(unsafe.Pointer(t)) } f(arg, seq) - lock(&timers.lock) + lock(&tb.lock) } if delta < 0 || faketime > 0 { // No timers left - put goroutine to sleep. - timers.rescheduling = true - goparkunlock(&timers.lock, "timer goroutine (idle)", traceEvGoBlock, 1) + tb.rescheduling = true + goparkunlock(&tb.lock, "timer goroutine (idle)", traceEvGoBlock, 1) continue } // At least one timer pending. Sleep until then. - timers.sleeping = true - timers.sleepUntil = now + delta - noteclear(&timers.waitnote) - unlock(&timers.lock) - notetsleepg(&timers.waitnote, delta) + tb.sleeping = true + tb.sleepUntil = now + delta + noteclear(&tb.waitnote) + unlock(&tb.lock) + notetsleepg(&tb.waitnote, delta) } } @@ -222,28 +257,68 @@ func timejump() *g { return nil } - lock(&timers.lock) - if !timers.created || len(timers.t) == 0 { - unlock(&timers.lock) + for i := range timers { + lock(&timers[i].lock) + } + gp := timejumpLocked() + for i := range timers { + unlock(&timers[i].lock) + } + + return gp +} + +func timejumpLocked() *g { + // Determine a timer bucket with minimum when. + var minT *timer + for i := range timers { + tb := &timers[i] + if !tb.created || len(tb.t) == 0 { + continue + } + t := tb.t[0] + if minT == nil || t.when < minT.when { + minT = t + } + } + if minT == nil || minT.when <= faketime { + return nil + } + + faketime = minT.when + tb := minT.tb + if !tb.rescheduling { return nil } + tb.rescheduling = false + return tb.gp +} + +func timeSleepUntil() int64 { + next := int64(1<<63 - 1) - var gp *g - if faketime < timers.t[0].when { - faketime = timers.t[0].when - if timers.rescheduling { - timers.rescheduling = false - gp = timers.gp + // Determine minimum sleepUntil across all the timer buckets. + // + // The function can not return a precise answer, + // as another timer may pop in as soon as timers have been unlocked. + // So lock the timers one by one instead of all at once. + for i := range timers { + tb := &timers[i] + + lock(&tb.lock) + if tb.sleeping && tb.sleepUntil < next { + next = tb.sleepUntil } + unlock(&tb.lock) } - unlock(&timers.lock) - return gp + + return next } // Heap maintenance algorithms. -func siftupTimer(i int) { - t := timers.t +func (tb *timersBucket) siftupTimer(i int) { + t := tb.t when := t[i].when tmp := t[i] for i > 0 { @@ -259,8 +334,8 @@ func siftupTimer(i int) { } } -func siftdownTimer(i int) { - t := timers.t +func (tb *timersBucket) siftdownTimer(i int) { + t := tb.t n := len(t) when := t[i].when tmp := t[i] diff --git a/src/runtime/trace.go b/src/runtime/trace.go index a2eb0ba8c3..50e4c73c83 100644 --- a/src/runtime/trace.go +++ b/src/runtime/trace.go @@ -408,9 +408,12 @@ func ReadTrace() []byte { var data []byte data = append(data, traceEvFrequency|0<<traceArgCountShift) data = traceAppend(data, uint64(freq)) - if timers.gp != nil { - data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift) - data = traceAppend(data, uint64(timers.gp.goid)) + for i := range timers { + tb := &timers[i] + if tb.gp != nil { + data = append(data, traceEvTimerGoroutine|0<<traceArgCountShift) + data = traceAppend(data, uint64(tb.gp.goid)) + } } // This will emit a bunch of full buffers, we will pick them up // on the next iteration. |
