diff options
| author | khr@golang.org <khr@golang.org> | 2025-09-12 14:43:19 -0700 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2025-11-20 09:19:39 -0800 |
| commit | 32f5aadd2ffc60421c62b185fa7668012fb5e73e (patch) | |
| tree | 98720009a4a90ef893980660e2152532fcaf609e /src/runtime/slice_test.go | |
| parent | a18aff805706bfdaeb9aca042111fae32f9f8b61 (diff) | |
| download | go-32f5aadd2ffc60421c62b185fa7668012fb5e73e.tar.xz | |
cmd/compile: stack allocate backing stores during append
We can already stack allocate the backing store during append if the
resulting backing store doesn't escape. See CL 664299.
This CL enables us to often stack allocate the backing store during
append *even if* the result escapes. Typically, for code like:
func f(n int) []int {
var r []int
for i := range n {
r = append(r, i)
}
return r
}
the backing store for r escapes, but only by returning it.
Could we operate with r on the stack for most of its lifeime,
and only move it to the heap at the return point?
The current implementation of append will need to do an allocation
each time it calls growslice. This will happen on the 1st, 2nd, 4th,
8th, etc. append calls. The allocations done by all but the
last growslice call will then immediately be garbage.
We'd like to avoid doing some of those intermediate allocations
if possible. We rewrite the above code by introducing a move2heap
operation:
func f(n int) []int {
var r []int
for i := range n {
r = append(r, i)
}
r = move2heap(r)
return r
}
Using the move2heap runtime function, which does:
move2heap(r):
If r is already backed by heap storage, return r.
Otherwise, copy r to the heap and return the copy.
Now we can treat the backing store of r allocated at the
append site as not escaping. Previous stack allocation
optimizations now apply, which can use a fixed-size
stack-allocated backing store for r when appending.
See the description in cmd/compile/internal/slice/slice.go
for how we ensure that this optimization is safe.
Change-Id: I81f36e58bade2241d07f67967d8d547fff5302b8
Reviewed-on: https://go-review.googlesource.com/c/go/+/707755
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime/slice_test.go')
| -rw-r--r-- | src/runtime/slice_test.go | 319 |
1 files changed, 319 insertions, 0 deletions
diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go index cd2bc26d1e..5463b6c02f 100644 --- a/src/runtime/slice_test.go +++ b/src/runtime/slice_test.go @@ -6,6 +6,9 @@ package runtime_test import ( "fmt" + "internal/race" + "internal/testenv" + "runtime" "testing" ) @@ -499,3 +502,319 @@ func BenchmarkAppendInPlace(b *testing.B) { }) } + +//go:noinline +func byteSlice(n int) []byte { + var r []byte + for i := range n { + r = append(r, byte(i)) + } + return r +} +func TestAppendByteInLoop(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping in -race mode") + } + for _, test := range [][3]int{ + {0, 0, 0}, + {1, 1, 8}, + {2, 1, 8}, + {8, 1, 8}, + {9, 1, 16}, + {16, 1, 16}, + {17, 1, 24}, + {24, 1, 24}, + {25, 1, 32}, + {32, 1, 32}, + {33, 1, 64}, // If we up the stack buffer size from 32->64, this line and the next would become 48. + {48, 1, 64}, + {49, 1, 64}, + {64, 1, 64}, + {65, 2, 128}, + } { + n := test[0] + want := test[1] + wantCap := test[2] + var r []byte + got := testing.AllocsPerRun(10, func() { + r = byteSlice(n) + }) + if got != float64(want) { + t.Errorf("for size %d, got %f allocs want %d", n, got, want) + } + if cap(r) != wantCap { + t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap) + } + } +} + +//go:noinline +func ptrSlice(n int, p *[]*byte) { + var r []*byte + for range n { + r = append(r, nil) + } + *p = r +} +func TestAppendPtrInLoop(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping in -race mode") + } + var tests [][3]int + if runtime.PtrSize == 8 { + tests = [][3]int{ + {0, 0, 0}, + {1, 1, 1}, + {2, 1, 2}, + {3, 1, 3}, // This is the interesting case, allocates 24 bytes when before it was 32. + {4, 1, 4}, + {5, 1, 8}, + {6, 1, 8}, + {7, 1, 8}, + {8, 1, 8}, + {9, 2, 16}, + } + } else { + tests = [][3]int{ + {0, 0, 0}, + {1, 1, 2}, + {2, 1, 2}, + {3, 1, 4}, + {4, 1, 4}, + {5, 1, 6}, // These two are also 24 bytes instead of 32. + {6, 1, 6}, // + {7, 1, 8}, + {8, 1, 8}, + {9, 1, 16}, + {10, 1, 16}, + {11, 1, 16}, + {12, 1, 16}, + {13, 1, 16}, + {14, 1, 16}, + {15, 1, 16}, + {16, 1, 16}, + {17, 2, 32}, + } + } + for _, test := range tests { + n := test[0] + want := test[1] + wantCap := test[2] + var r []*byte + got := testing.AllocsPerRun(10, func() { + ptrSlice(n, &r) + }) + if got != float64(want) { + t.Errorf("for size %d, got %f allocs want %d", n, got, want) + } + if cap(r) != wantCap { + t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap) + } + } +} + +//go:noinline +func byteCapSlice(n int) ([]byte, int) { + var r []byte + for i := range n { + r = append(r, byte(i)) + } + return r, cap(r) +} +func TestAppendByteCapInLoop(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping in -race mode") + } + for _, test := range [][3]int{ + {0, 0, 0}, + {1, 1, 8}, + {2, 1, 8}, + {8, 1, 8}, + {9, 1, 16}, + {16, 1, 16}, + {17, 1, 24}, + {24, 1, 24}, + {25, 1, 32}, + {32, 1, 32}, + {33, 1, 64}, + {48, 1, 64}, + {49, 1, 64}, + {64, 1, 64}, + {65, 2, 128}, + } { + n := test[0] + want := test[1] + wantCap := test[2] + var r []byte + got := testing.AllocsPerRun(10, func() { + r, _ = byteCapSlice(n) + }) + if got != float64(want) { + t.Errorf("for size %d, got %f allocs want %d", n, got, want) + } + if cap(r) != wantCap { + t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap) + } + } +} + +func TestAppendGeneric(t *testing.T) { + type I *int + r := testAppendGeneric[I](100) + if len(r) != 100 { + t.Errorf("bad length") + } +} + +//go:noinline +func testAppendGeneric[E any](n int) []E { + var r []E + var z E + for range n { + r = append(r, z) + } + return r +} + +func appendSomeBytes(r []byte, s []byte) []byte { + for _, b := range s { + r = append(r, b) + } + return r +} + +func TestAppendOfArg(t *testing.T) { + r := make([]byte, 24) + for i := 0; i < 24; i++ { + r[i] = byte(i) + } + appendSomeBytes(r, []byte{25, 26, 27}) + // Do the same thing, trying to overwrite any + // stack-allocated buffers used above. + s := make([]byte, 24) + for i := 0; i < 24; i++ { + s[i] = 99 + } + appendSomeBytes(s, []byte{99, 99, 99}) + // Check that we still have the right data. + for i, b := range r { + if b != byte(i) { + t.Errorf("r[%d]=%d, want %d", i, b, byte(i)) + } + } + +} + +func BenchmarkAppendInLoop(b *testing.B) { + for _, size := range []int{0, 1, 8, 16, 32, 64, 128} { + b.Run(fmt.Sprintf("%d", size), + func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + byteSlice(size) + } + }) + } +} + +func TestMoveToHeapEarly(t *testing.T) { + // Just checking that this compiles. + var x []int + y := x // causes a move2heap in the entry block + for range 5 { + x = append(x, 5) + } + _ = y +} + +func TestMoveToHeapCap(t *testing.T) { + var c int + r := func() []byte { + var s []byte + for i := range 10 { + s = append(s, byte(i)) + } + c = cap(s) + return s + }() + if c != cap(r) { + t.Errorf("got cap=%d, want %d", c, cap(r)) + } + sinkSlice = r +} + +//go:noinline +func runit(f func()) { + f() +} + +func TestMoveToHeapClosure1(t *testing.T) { + var c int + r := func() []byte { + var s []byte + for i := range 10 { + s = append(s, byte(i)) + } + runit(func() { + c = cap(s) + }) + return s + }() + if c != cap(r) { + t.Errorf("got cap=%d, want %d", c, cap(r)) + } + sinkSlice = r +} +func TestMoveToHeapClosure2(t *testing.T) { + var c int + r := func() []byte { + var s []byte + for i := range 10 { + s = append(s, byte(i)) + } + c = func() int { + return cap(s) + }() + return s + }() + if c != cap(r) { + t.Errorf("got cap=%d, want %d", c, cap(r)) + } + sinkSlice = r +} + +//go:noinline +func buildClosure(t *testing.T) ([]byte, func()) { + var s []byte + for i := range 20 { + s = append(s, byte(i)) + } + c := func() { + for i, b := range s { + if b != byte(i) { + t.Errorf("s[%d]=%d, want %d", i, b, i) + } + } + } + return s, c +} + +func TestMoveToHeapClosure3(t *testing.T) { + _, f := buildClosure(t) + overwriteStack(0) + f() +} + +//go:noinline +func overwriteStack(n int) uint64 { + var x [100]uint64 + for i := range x { + x[i] = 0xabcdabcdabcdabcd + } + return x[n] +} + +var sinkSlice []byte |
