aboutsummaryrefslogtreecommitdiff
path: root/src/testing/benchmark_test.go
diff options
context:
space:
mode:
authorthepudds <thepudds1460@gmail.com>2026-01-23 15:59:40 -0500
committerGopher Robot <gobot@golang.org>2026-01-30 09:55:08 -0800
commita56d064bd3e8ed3320175708c725e459ba31d5cd (patch)
tree578dfb68f818b7365ac65fc7d61c76d46cbd3d3e /src/testing/benchmark_test.go
parent76e4bd2a57df870ac65973ff0404e7f3cb329c2f (diff)
downloadgo-a56d064bd3e8ed3320175708c725e459ba31d5cd.tar.xz
[release-branch.go1.26] cmd/compile/internal/bloop: avoid extraneous heap allocs in b.Loop body due to autotmp variable scope
The motivating example I created for #73137 still seems to heap allocate in go1.26rc2 when used in a b.Loop body. │ go1.25 │ go1.26rc2 │ │ allocs/op │ allocs/op vs base │ NewX/b.Loop-basic-4 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ I suspect it is because the temps are by default declared outside the loop body, which escape analysis will determine is an escaping value and result in a heap allocation. (I've seen this problem before, including in my older CL 546023 that attempts to help PGO with a similar issue.) This is an attempt to address that by placing ODCLs within the b.Loop body for the temps that are created so that they can be marked keepalive. There are two cases handled in the CL: function return values and function arguments. The first case is what affects my example from #73137, and is also illustrated via the NewX test case in the new test/escape_bloop.go file. Without this CL, the NewX call in the BenchmarkBloop test is inlined, which is an improvement over Go 1.25, but the slice still escapes because the temporary used for the return value is declared outside the loop body. With this CL, the slice does not escape. The second case is illustrated via the new BenchmarkBLoopFunctionArg test, which shows a function argument that escapes without this CL but does not escape with this CL. We can also make the two new b.Loop tests in testing/benchmark_test.go individually pass or fail as expected based on individually reverting the two changes in this CL. While we are here, we add a note to typecheck.TempAt to help make people aware of this behavior. Updates #73137 Fixes #77339 Reviewed-on: https://go-review.googlesource.com/c/go/+/738822 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Junyang Shao <shaojunyang@google.com> Change-Id: I80e89ca95ba297b0d95f02782e6f4ae901a4361a Reviewed-on: https://go-review.googlesource.com/c/go/+/740600 Reviewed-by: Austin Clements <austin@google.com> Auto-Submit: Junyang Shao <shaojunyang@google.com> Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
Diffstat (limited to 'src/testing/benchmark_test.go')
-rw-r--r--src/testing/benchmark_test.go61
1 files changed, 61 insertions, 0 deletions
diff --git a/src/testing/benchmark_test.go b/src/testing/benchmark_test.go
index e2dd24c839..a21daf7d12 100644
--- a/src/testing/benchmark_test.go
+++ b/src/testing/benchmark_test.go
@@ -9,6 +9,10 @@ import (
"cmp"
"context"
"errors"
+ "internal/asan"
+ "internal/msan"
+ "internal/race"
+ "internal/testenv"
"runtime"
"slices"
"strings"
@@ -157,6 +161,63 @@ func TestBenchmarkContext(t *testing.T) {
})
}
+// Some auxiliary functions for measuring allocations in a b.Loop benchmark below,
+// where in this case mid-stack inlining allows stack allocation of a slice.
+// This is based on the example in go.dev/issue/73137.
+
+func newX() []byte {
+ out := make([]byte, 8)
+ return use1(out)
+}
+
+//go:noinline
+func use1(out []byte) []byte {
+ return out
+}
+
+// An auxiliary function for measuring allocations with a simple function argument
+// in the b.Loop body.
+
+//go:noinline
+func use2(x any) {}
+
+func TestBenchmarkBLoopAllocs(t *testing.T) {
+ testenv.SkipIfOptimizationOff(t)
+ if race.Enabled || asan.Enabled || msan.Enabled {
+ t.Skip("skipping in case sanitizers alter allocation behavior")
+ }
+
+ t.Run("call-result", func(t *testing.T) {
+ bRet := testing.Benchmark(func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ newX()
+ }
+ })
+ if bRet.N == 0 {
+ t.Fatalf("benchmark reported 0 iterations")
+ }
+ if bRet.AllocsPerOp() != 0 {
+ t.Errorf("want 0 allocs, got %d", bRet.AllocsPerOp())
+ }
+ })
+
+ t.Run("call-arg", func(t *testing.T) {
+ bRet := testing.Benchmark(func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ use2(make([]byte, 1000))
+ }
+ })
+ if bRet.N == 0 {
+ t.Fatalf("benchmark reported 0 iterations")
+ }
+ if bRet.AllocsPerOp() != 0 {
+ t.Errorf("want 0 allocs, got %d", bRet.AllocsPerOp())
+ }
+ })
+}
+
func ExampleB_RunParallel() {
// Parallel benchmark for text/template.Template.Execute on a single object.
testing.Benchmark(func(b *testing.B) {