From 2ae2a94857cb17a98a86a8332d6f76863982bf59 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 16 Sep 2020 16:22:28 +0000 Subject: runtime: fix leak and locking in BenchmarkMSpanCountAlloc CL 249917 made the mspan in MSpanCountAlloc no longer stack-allocated (for good reason), but then allocated an mspan on each call and did not free it, resulting in a leak. That allocation was also not protected by the heap lock, which could lead to data corruption of mheap fields and the spanalloc. To fix this, export some functions to allocate/free dummy mspans from spanalloc (with proper locking) and allocate just one up-front for the benchmark, freeing it at the end. Then, update MSpanCountAlloc to accept a dummy mspan. Note that we need to allocate the dummy mspan up-front otherwise we measure things like heap locking and fixalloc performance instead of what we actually want to measure: how fast we can do a popcount on the mark bits. Fixes #41391. Change-Id: If6629a6ec1ece639c7fb78532045837a8c872c04 Reviewed-on: https://go-review.googlesource.com/c/go/+/255297 Run-TryBot: Michael Knyszek Reviewed-by: Keith Randall TryBot-Result: Go Bot Trust: Michael Knyszek --- src/runtime/gc_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/runtime/gc_test.go') diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index c5c8a4cecf..9edebdada6 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -763,6 +763,10 @@ func BenchmarkScanStackNoLocals(b *testing.B) { } func BenchmarkMSpanCountAlloc(b *testing.B) { + // Allocate one dummy mspan for the whole benchmark. + s := runtime.AllocMSpan() + defer runtime.FreeMSpan(s) + // n is the number of bytes to benchmark against. // n must always be a multiple of 8, since gcBits is // always rounded up 8 bytes. @@ -774,7 +778,7 @@ func BenchmarkMSpanCountAlloc(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - runtime.MSpanCountAlloc(bits) + runtime.MSpanCountAlloc(s, bits) } }) } -- cgit v1.3 From 74e566ed1dc52f7ef58093aff936a0931537a1ad Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 5 Aug 2020 23:10:46 +0000 Subject: runtime: add readMetrics latency benchmark This change adds a new benchmark to the runtime tests for measuring the latency of the new metrics implementation, based on the ReadMemStats latency benchmark. readMetrics will have more metrics added to it in the future, and this benchmark will serve as a way to measure the cost of adding additional metrics. Change-Id: Ib05e3ed4afa49a70863fc0c418eab35b72263e24 Reviewed-on: https://go-review.googlesource.com/c/go/+/247042 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Emmanuel Odeke Reviewed-by: Michael Pratt --- src/runtime/gc_test.go | 17 ++++++++++++----- src/runtime/metrics_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) (limited to 'src/runtime/gc_test.go') diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go index 9edebdada6..7870f31ae9 100644 --- a/src/runtime/gc_test.go +++ b/src/runtime/gc_test.go @@ -518,7 +518,7 @@ func BenchmarkReadMemStats(b *testing.B) { hugeSink = nil } -func BenchmarkReadMemStatsLatency(b *testing.B) { +func applyGCLoad(b *testing.B) func() { // We’ll apply load to the runtime with maxProcs-1 goroutines // and use one more to actually benchmark. It doesn't make sense // to try to run this test with only 1 P (that's what @@ -563,6 +563,14 @@ func BenchmarkReadMemStatsLatency(b *testing.B) { runtime.KeepAlive(hold) }() } + return func() { + close(done) + wg.Wait() + } +} + +func BenchmarkReadMemStatsLatency(b *testing.B) { + stop := applyGCLoad(b) // Spend this much time measuring latencies. latencies := make([]time.Duration, 0, 1024) @@ -579,12 +587,11 @@ func BenchmarkReadMemStatsLatency(b *testing.B) { runtime.ReadMemStats(&ms) latencies = append(latencies, time.Now().Sub(start)) } - close(done) - // Make sure to stop the timer before we wait! The goroutines above - // are very heavy-weight and not easy to stop, so we could end up + // Make sure to stop the timer before we wait! The load created above + // is very heavy-weight and not easy to stop, so we could end up // confusing the benchmarking framework for small b.N. b.StopTimer() - wg.Wait() + stop() // Disable the default */op metrics. // ns/op doesn't mean anything because it's an average, but we diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index f00aad07c4..d925b057b0 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -7,8 +7,10 @@ package runtime_test import ( "runtime" "runtime/metrics" + "sort" "strings" "testing" + "time" "unsafe" ) @@ -112,3 +114,39 @@ func TestReadMetricsConsistency(t *testing.T) { t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want) } } + +func BenchmarkReadMetricsLatency(b *testing.B) { + stop := applyGCLoad(b) + + // Spend this much time measuring latencies. + latencies := make([]time.Duration, 0, 1024) + _, samples := prepareAllMetricsSamples() + + // Hit metrics.Read continuously and measure. + b.ResetTimer() + for i := 0; i < b.N; i++ { + start := time.Now() + metrics.Read(samples) + latencies = append(latencies, time.Now().Sub(start)) + } + // Make sure to stop the timer before we wait! The load created above + // is very heavy-weight and not easy to stop, so we could end up + // confusing the benchmarking framework for small b.N. + b.StopTimer() + stop() + + // Disable the default */op metrics. + // ns/op doesn't mean anything because it's an average, but we + // have a sleep in our b.N loop above which skews this significantly. + b.ReportMetric(0, "ns/op") + b.ReportMetric(0, "B/op") + b.ReportMetric(0, "allocs/op") + + // Sort latencies then report percentiles. + sort.Slice(latencies, func(i, j int) bool { + return latencies[i] < latencies[j] + }) + b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns") + b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns") + b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns") +} -- cgit v1.3