runtime: change mutex profile to count every blocked goroutine

The pprof mutex profile was meant to match the Google C++ (now Abseil) mutex profiler, originally designed and implemented by Mike Burrows. When we worked on the Go version, pjw and I missed that C++ counts the time each thread is blocked, even if multiple threads are blocked on a mutex. That is, if 100 threads are blocked on the same mutex for the same 10ms, that still counts as 1000ms of contention in C++. In Go, to date, /debug/pprof/mutex has counted that as only 10ms of contention. If 100 goroutines are blocked on one mutex and only 1 goroutine is blocked on another mutex, we probably do want to see the first mutex as being more contended, so the Abseil approach is the more useful one. This CL adopts "contention scales with number of goroutines blocked", to better match Abseil [1]. However, it still makes sure to attribute the time to the unlock that caused the backup, not subsequent innocent unlocks that were affected by the congestion. In this way it still gives more accurate profiles than Abseil does. [1] https://github.com/abseil/abseil-cpp/blob/lts_2023_01_25/absl/synchronization/mutex.cc#L2390 Fixes #61015. Change-Id: I7eb9e706867ffa8c0abb5b26a1b448f6eba49331 Reviewed-on: https://go-review.googlesource.com/c/go/+/506415 Run-TryBot: Russ Cox <rsc@golang.org> Auto-Submit: Russ Cox <rsc@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Michael Knyszek <mknyszek@google.com>
author: Russ Cox <rsc@golang.org> 2023-06-26 17:12:44 -0400
committer: Gopher Robot <gobot@golang.org> 2023-08-17 01:30:31 +0000
commit: 1c0035401358c8bfc2ff646b1d950da5fcd6b355 (patch)
tree: b446e570234778b0d89924fd70ab7a16b4542411 /src/runtime/pprof
parent: 14a3ffc3d2165ef03c3ffd0037a4fa6dbb776026 (diff)
download: go-1c0035401358c8bfc2ff646b1d950da5fcd6b355.tar.xz
1 files changed, 58 insertions, 16 deletions
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index 1ade860441..9480240615 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -1023,7 +1023,7 @@ func containsStack(got [][]string, want []string) bool {
 // awaitBlockedGoroutine spins on runtime.Gosched until a runtime stack dump
 // shows a goroutine in the given state with a stack frame in
 // runtime/pprof.<fName>.
-func awaitBlockedGoroutine(t *testing.T, state, fName string) {
+func awaitBlockedGoroutine(t *testing.T, state, fName string, count int) {
 	re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime/pprof\.%s`, regexp.QuoteMeta(state), fName)
 	r := regexp.MustCompile(re)
 
@@ -1047,7 +1047,7 @@ func awaitBlockedGoroutine(t *testing.T, state, fName string) {
 			buf = make([]byte, 2*len(buf))
 			continue
 		}
-		if r.Match(buf[:n]) {
+		if len(r.FindAll(buf[:n], -1)) >= count {
 			return
 		}
 	}
@@ -1056,7 +1056,7 @@ func awaitBlockedGoroutine(t *testing.T, state, fName string) {
 func blockChanRecv(t *testing.T) {
 	c := make(chan bool)
 	go func() {
-		awaitBlockedGoroutine(t, "chan receive", "blockChanRecv")
+		awaitBlockedGoroutine(t, "chan receive", "blockChanRecv", 1)
 		c <- true
 	}()
 	<-c
@@ -1065,7 +1065,7 @@ func blockChanRecv(t *testing.T) {
 func blockChanSend(t *testing.T) {
 	c := make(chan bool)
 	go func() {
-		awaitBlockedGoroutine(t, "chan send", "blockChanSend")
+		awaitBlockedGoroutine(t, "chan send", "blockChanSend", 1)
 		<-c
 	}()
 	c <- true
@@ -1074,7 +1074,7 @@ func blockChanSend(t *testing.T) {
 func blockChanClose(t *testing.T) {
 	c := make(chan bool)
 	go func() {
-		awaitBlockedGoroutine(t, "chan receive", "blockChanClose")
+		awaitBlockedGoroutine(t, "chan receive", "blockChanClose", 1)
 		close(c)
 	}()
 	<-c
@@ -1086,7 +1086,7 @@ func blockSelectRecvAsync(t *testing.T) {
 	c2 := make(chan bool, 1)
 	go func() {
 		for i := 0; i < numTries; i++ {
-			awaitBlockedGoroutine(t, "select", "blockSelectRecvAsync")
+			awaitBlockedGoroutine(t, "select", "blockSelectRecvAsync", 1)
 			c <- true
 		}
 	}()
@@ -1102,7 +1102,7 @@ func blockSelectSendSync(t *testing.T) {
 	c := make(chan bool)
 	c2 := make(chan bool)
 	go func() {
-		awaitBlockedGoroutine(t, "select", "blockSelectSendSync")
+		awaitBlockedGoroutine(t, "select", "blockSelectSendSync", 1)
 		<-c
 	}()
 	select {
@@ -1115,7 +1115,7 @@ func blockMutex(t *testing.T) {
 	var mu sync.Mutex
 	mu.Lock()
 	go func() {
-		awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex")
+		awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex", 1)
 		mu.Unlock()
 	}()
 	// Note: Unlock releases mu before recording the mutex event,
@@ -1125,12 +1125,36 @@ func blockMutex(t *testing.T) {
 	mu.Lock()
 }
 
+func blockMutexN(t *testing.T, n int, d time.Duration) {
+	var wg sync.WaitGroup
+	var mu sync.Mutex
+	mu.Lock()
+	go func() {
+		awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex", n)
+		time.Sleep(d)
+		mu.Unlock()
+	}()
+	// Note: Unlock releases mu before recording the mutex event,
+	// so it's theoretically possible for this to proceed and
+	// capture the profile before the event is recorded. As long
+	// as this is blocked before the unlock happens, it's okay.
+	for i := 0; i < n; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			mu.Lock()
+			mu.Unlock()
+		}()
+	}
+	wg.Wait()
+}
+
 func blockCond(t *testing.T) {
 	var mu sync.Mutex
 	c := sync.NewCond(&mu)
 	mu.Lock()
 	go func() {
-		awaitBlockedGoroutine(t, "sync.Cond.Wait", "blockCond")
+		awaitBlockedGoroutine(t, "sync.Cond.Wait", "blockCond", 1)
 		mu.Lock()
 		c.Signal()
 		mu.Unlock()
@@ -1217,7 +1241,11 @@ func TestMutexProfile(t *testing.T) {
 		t.Fatalf("need MutexProfileRate 0, got %d", old)
 	}
 
-	blockMutex(t)
+	const (
+		N = 100
+		D = 100 * time.Millisecond
+	)
+	blockMutexN(t, N, D)
 
 	t.Run("debug=1", func(t *testing.T) {
 		var w strings.Builder
@@ -1230,15 +1258,11 @@ func TestMutexProfile(t *testing.T) {
 		}
 		prof = strings.Trim(prof, "\n")
 		lines := strings.Split(prof, "\n")
-		if len(lines) != 6 {
-			t.Errorf("expected 6 lines, got %d %q\n%s", len(lines), prof, prof)
-		}
 		if len(lines) < 6 {
-			return
+			t.Fatalf("expected >=6 lines, got %d %q\n%s", len(lines), prof, prof)
 		}
 		// checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
 		r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+`
-		//r2 := "^[0-9]+ 1 @ 0x[0-9a-f x]+$"
 		if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
 			t.Errorf("%q didn't match %q", lines[3], r2)
 		}
@@ -1263,12 +1287,30 @@ func TestMutexProfile(t *testing.T) {
 
 		stks := stacks(p)
 		for _, want := range [][]string{
-			{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutex.func1"},
+			{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutexN.func1"},
 		} {
 			if !containsStack(stks, want) {
 				t.Errorf("No matching stack entry for %+v", want)
 			}
 		}
+
+		i := 0
+		for ; i < len(p.SampleType); i++ {
+			if p.SampleType[i].Unit == "nanoseconds" {
+				break
+			}
+		}
+		if i >= len(p.SampleType) {
+			t.Fatalf("profile did not contain nanoseconds sample")
+		}
+		total := int64(0)
+		for _, s := range p.Sample {
+			total += s.Value[i]
+		}
+		d := time.Duration(total)
+		if d < N*D*9/10 || d > N*D*2 { // want N*D but allow [0.9,2.0]*that.
+			t.Fatalf("profile samples total %v, want %v", d, N*D)
+		}
 	})
 }
author	Russ Cox <rsc@golang.org>	2023-06-26 17:12:44 -0400
committer	Gopher Robot <gobot@golang.org>	2023-08-17 01:30:31 +0000
commit	1c0035401358c8bfc2ff646b1d950da5fcd6b355 (patch)
tree	b446e570234778b0d89924fd70ab7a16b4542411 /src/runtime/pprof
parent	14a3ffc3d2165ef03c3ffd0037a4fa6dbb776026 (diff)
download	go-1c0035401358c8bfc2ff646b1d950da5fcd6b355.tar.xz