aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/runtime_test.go
diff options
context:
space:
mode:
authorRhys Hiltner <rhys.hiltner@gmail.com>2024-07-31 13:45:53 -0700
committerGopher Robot <gobot@golang.org>2024-08-02 21:00:51 +0000
commite8776e19b93dc59860dad2f23da68d373b3fea39 (patch)
tree0606ce2431d9e89be3f369d3f6d7330e65e24822 /src/runtime/runtime_test.go
parentaac7106cb9b6e59cb16f1c0a1123a67669a4ed2d (diff)
downloadgo-e8776e19b93dc59860dad2f23da68d373b3fea39.tar.xz
runtime: benchmark mutex handoffs
The speed of handing off a mutex to a waiting thread is sensitive to the configuration of the spinning section of lock2. Measure that latency directly, to complement our existing benchmarks of mutex throughput. For #68578 Change-Id: I7637684bcff62eb05cc008491f095f653d13af4b Reviewed-on: https://go-review.googlesource.com/c/go/+/602176 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Auto-Submit: Rhys Hiltner <rhys.hiltner@gmail.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime/runtime_test.go')
-rw-r--r--src/runtime/runtime_test.go110
1 files changed, 110 insertions, 0 deletions
diff --git a/src/runtime/runtime_test.go b/src/runtime/runtime_test.go
index 0b51dd8c8d..6004649ee9 100644
--- a/src/runtime/runtime_test.go
+++ b/src/runtime/runtime_test.go
@@ -7,6 +7,8 @@ package runtime_test
import (
"flag"
"fmt"
+ "internal/cpu"
+ "internal/runtime/atomic"
"io"
. "runtime"
"runtime/debug"
@@ -561,3 +563,111 @@ func BenchmarkOSYield(b *testing.B) {
OSYield()
}
}
+
+func BenchmarkMutexHandoff(b *testing.B) {
+ testcase := func(delay func(l *Mutex)) func(b *testing.B) {
+ return func(b *testing.B) {
+ if workers := 2; GOMAXPROCS(0) < workers {
+ b.Skipf("requires GOMAXPROCS >= %d", workers)
+ }
+
+ // Measure latency of mutex handoff between threads.
+ //
+ // Hand off a runtime.mutex between two threads, one running a
+ // "coordinator" goroutine and the other running a "worker"
+ // goroutine. We don't override the runtime's typical
+ // goroutine/thread mapping behavior.
+ //
+ // Measure the latency, starting when the coordinator enters a call
+ // to runtime.unlock and ending when the worker's call to
+ // runtime.lock returns. The benchmark can specify a "delay"
+ // function to simulate the length of the mutex-holder's critical
+ // section, including to arrange for the worker's thread to be in
+ // either the "spinning" or "sleeping" portions of the runtime.lock2
+ // implementation. Measurement starts after any such "delay".
+ //
+ // The two threads' goroutines communicate their current position to
+ // each other in a non-blocking way via the "turn" state.
+
+ var state struct {
+ _ [cpu.CacheLinePadSize]byte
+ lock Mutex
+ _ [cpu.CacheLinePadSize]byte
+ turn atomic.Int64
+ _ [cpu.CacheLinePadSize]byte
+ }
+
+ var delta atomic.Int64
+ var wg sync.WaitGroup
+
+ // coordinator:
+ // - acquire the mutex
+ // - set the turn to 2 mod 4, instructing the worker to begin its Lock call
+ // - wait until the mutex is contended
+ // - wait a bit more so the worker can commit to its sleep
+ // - release the mutex and wait for it to be our turn (0 mod 4) again
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ var t int64
+ for range b.N {
+ Lock(&state.lock)
+ state.turn.Add(2)
+ delay(&state.lock)
+ t -= Nanotime() // start the timer
+ Unlock(&state.lock)
+ for state.turn.Load()&0x2 != 0 {
+ }
+ }
+ state.turn.Add(1)
+ delta.Add(t)
+ }()
+
+ // worker:
+ // - wait until its our turn (2 mod 4)
+ // - acquire and release the mutex
+ // - switch the turn counter back to the coordinator (0 mod 4)
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ var t int64
+ for {
+ switch state.turn.Load() & 0x3 {
+ case 0:
+ case 1, 3:
+ delta.Add(t)
+ return
+ case 2:
+ Lock(&state.lock)
+ t += Nanotime() // stop the timer
+ Unlock(&state.lock)
+ state.turn.Add(2)
+ }
+ }
+ }()
+
+ wg.Wait()
+ b.ReportMetric(float64(delta.Load())/float64(b.N), "ns/op")
+ }
+ }
+
+ b.Run("Solo", func(b *testing.B) {
+ var lock Mutex
+ for range b.N {
+ Lock(&lock)
+ Unlock(&lock)
+ }
+ })
+
+ b.Run("FastPingPong", testcase(func(l *Mutex) {}))
+ b.Run("SlowPingPong", testcase(func(l *Mutex) {
+ // Wait for the worker to stop spinning and prepare to sleep
+ for !MutexContended(l) {
+ }
+ // Wait a bit longer so the OS can finish committing the worker to its
+ // sleep. Balance consistency against getting enough iterations.
+ const extraNs = 10e3
+ for t0 := Nanotime(); Nanotime()-t0 < extraNs; {
+ }
+ }))
+}