From 2ae2a94857cb17a98a86a8332d6f76863982bf59 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 16 Sep 2020 16:22:28 +0000
Subject: runtime: fix leak and locking in BenchmarkMSpanCountAlloc

CL 249917 made the mspan in MSpanCountAlloc no longer stack-allocated
(for good reason), but then allocated an mspan on each call and did not
free it, resulting in a leak. That allocation was also not protected by
the heap lock, which could lead to data corruption of mheap fields and
the spanalloc.

To fix this, export some functions to allocate/free dummy mspans from
spanalloc (with proper locking) and allocate just one up-front for the
benchmark, freeing it at the end. Then, update MSpanCountAlloc to accept
a dummy mspan.

Note that we need to allocate the dummy mspan up-front otherwise we
measure things like heap locking and fixalloc performance instead of
what we actually want to measure: how fast we can do a popcount on the
mark bits.

Fixes #41391.

Change-Id: If6629a6ec1ece639c7fb78532045837a8c872c04
Reviewed-on: https://go-review.googlesource.com/c/go/+/255297
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
---
 src/runtime/gc_test.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/runtime/gc_test.go')

diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index c5c8a4cecf..9edebdada6 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -763,6 +763,10 @@ func BenchmarkScanStackNoLocals(b *testing.B) {
 }
 
 func BenchmarkMSpanCountAlloc(b *testing.B) {
+	// Allocate one dummy mspan for the whole benchmark.
+	s := runtime.AllocMSpan()
+	defer runtime.FreeMSpan(s)
+
 	// n is the number of bytes to benchmark against.
 	// n must always be a multiple of 8, since gcBits is
 	// always rounded up 8 bytes.
@@ -774,7 +778,7 @@ func BenchmarkMSpanCountAlloc(b *testing.B) {
 
 			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
-				runtime.MSpanCountAlloc(bits)
+				runtime.MSpanCountAlloc(s, bits)
 			}
 		})
 	}
-- 
cgit v1.3


From 74e566ed1dc52f7ef58093aff936a0931537a1ad Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 5 Aug 2020 23:10:46 +0000
Subject: runtime: add readMetrics latency benchmark

This change adds a new benchmark to the runtime tests for measuring the
latency of the new metrics implementation, based on the
ReadMemStats latency benchmark. readMetrics will have more metrics added
to it in the future, and this benchmark will serve as a way to measure
the cost of adding additional metrics.

Change-Id: Ib05e3ed4afa49a70863fc0c418eab35b72263e24
Reviewed-on: https://go-review.googlesource.com/c/go/+/247042
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Emmanuel Odeke <emmanuel@orijtech.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
---
 src/runtime/gc_test.go      | 17 ++++++++++++-----
 src/runtime/metrics_test.go | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 5 deletions(-)

(limited to 'src/runtime/gc_test.go')

diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index 9edebdada6..7870f31ae9 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -518,7 +518,7 @@ func BenchmarkReadMemStats(b *testing.B) {
 	hugeSink = nil
 }
 
-func BenchmarkReadMemStatsLatency(b *testing.B) {
+func applyGCLoad(b *testing.B) func() {
 	// We’ll apply load to the runtime with maxProcs-1 goroutines
 	// and use one more to actually benchmark. It doesn't make sense
 	// to try to run this test with only 1 P (that's what
@@ -563,6 +563,14 @@ func BenchmarkReadMemStatsLatency(b *testing.B) {
 			runtime.KeepAlive(hold)
 		}()
 	}
+	return func() {
+		close(done)
+		wg.Wait()
+	}
+}
+
+func BenchmarkReadMemStatsLatency(b *testing.B) {
+	stop := applyGCLoad(b)
 
 	// Spend this much time measuring latencies.
 	latencies := make([]time.Duration, 0, 1024)
@@ -579,12 +587,11 @@ func BenchmarkReadMemStatsLatency(b *testing.B) {
 		runtime.ReadMemStats(&ms)
 		latencies = append(latencies, time.Now().Sub(start))
 	}
-	close(done)
-	// Make sure to stop the timer before we wait! The goroutines above
-	// are very heavy-weight and not easy to stop, so we could end up
+	// Make sure to stop the timer before we wait! The load created above
+	// is very heavy-weight and not easy to stop, so we could end up
 	// confusing the benchmarking framework for small b.N.
 	b.StopTimer()
-	wg.Wait()
+	stop()
 
 	// Disable the default */op metrics.
 	// ns/op doesn't mean anything because it's an average, but we
diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go
index f00aad07c4..d925b057b0 100644
--- a/src/runtime/metrics_test.go
+++ b/src/runtime/metrics_test.go
@@ -7,8 +7,10 @@ package runtime_test
 import (
 	"runtime"
 	"runtime/metrics"
+	"sort"
 	"strings"
 	"testing"
+	"time"
 	"unsafe"
 )
 
@@ -112,3 +114,39 @@ func TestReadMetricsConsistency(t *testing.T) {
 		t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
 	}
 }
+
+func BenchmarkReadMetricsLatency(b *testing.B) {
+	stop := applyGCLoad(b)
+
+	// Spend this much time measuring latencies.
+	latencies := make([]time.Duration, 0, 1024)
+	_, samples := prepareAllMetricsSamples()
+
+	// Hit metrics.Read continuously and measure.
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		start := time.Now()
+		metrics.Read(samples)
+		latencies = append(latencies, time.Now().Sub(start))
+	}
+	// Make sure to stop the timer before we wait! The load created above
+	// is very heavy-weight and not easy to stop, so we could end up
+	// confusing the benchmarking framework for small b.N.
+	b.StopTimer()
+	stop()
+
+	// Disable the default */op metrics.
+	// ns/op doesn't mean anything because it's an average, but we
+	// have a sleep in our b.N loop above which skews this significantly.
+	b.ReportMetric(0, "ns/op")
+	b.ReportMetric(0, "B/op")
+	b.ReportMetric(0, "allocs/op")
+
+	// Sort latencies then report percentiles.
+	sort.Slice(latencies, func(i, j int) bool {
+		return latencies[i] < latencies[j]
+	})
+	b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
+	b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
+	b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
+}
-- 
cgit v1.3