diff options
| author | khr@golang.org <khr@golang.org> | 2025-09-12 14:43:19 -0700 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2025-11-20 09:19:39 -0800 |
| commit | 32f5aadd2ffc60421c62b185fa7668012fb5e73e (patch) | |
| tree | 98720009a4a90ef893980660e2152532fcaf609e /src/runtime/slice.go | |
| parent | a18aff805706bfdaeb9aca042111fae32f9f8b61 (diff) | |
| download | go-32f5aadd2ffc60421c62b185fa7668012fb5e73e.tar.xz | |
cmd/compile: stack allocate backing stores during append
We can already stack allocate the backing store during append if the
resulting backing store doesn't escape. See CL 664299.
This CL enables us to often stack allocate the backing store during
append *even if* the result escapes. Typically, for code like:
func f(n int) []int {
var r []int
for i := range n {
r = append(r, i)
}
return r
}
the backing store for r escapes, but only by returning it.
Could we operate with r on the stack for most of its lifeime,
and only move it to the heap at the return point?
The current implementation of append will need to do an allocation
each time it calls growslice. This will happen on the 1st, 2nd, 4th,
8th, etc. append calls. The allocations done by all but the
last growslice call will then immediately be garbage.
We'd like to avoid doing some of those intermediate allocations
if possible. We rewrite the above code by introducing a move2heap
operation:
func f(n int) []int {
var r []int
for i := range n {
r = append(r, i)
}
r = move2heap(r)
return r
}
Using the move2heap runtime function, which does:
move2heap(r):
If r is already backed by heap storage, return r.
Otherwise, copy r to the heap and return the copy.
Now we can treat the backing store of r allocated at the
append site as not escaping. Previous stack allocation
optimizations now apply, which can use a fixed-size
stack-allocated backing store for r when appending.
See the description in cmd/compile/internal/slice/slice.go
for how we ensure that this optimization is safe.
Change-Id: I81f36e58bade2241d07f67967d8d547fff5302b8
Reviewed-on: https://go-review.googlesource.com/c/go/+/707755
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/runtime/slice.go')
| -rw-r--r-- | src/runtime/slice.go | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/src/runtime/slice.go b/src/runtime/slice.go index e31d5dccb2..a9e8fc1610 100644 --- a/src/runtime/slice.go +++ b/src/runtime/slice.go @@ -399,3 +399,107 @@ func bytealg_MakeNoZero(len int) []byte { cap := roundupsize(uintptr(len), true) return unsafe.Slice((*byte)(mallocgc(cap, nil, false)), cap)[:len] } + +// moveSlice copies the input slice to the heap and returns it. +// et is the element type of the slice. +func moveSlice(et *_type, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) { + if cap == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + capmem := uintptr(cap) * et.Size_ + new := mallocgc(capmem, et, true) + bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), capmem, et) + memmove(new, old, capmem) + return new, len, cap +} + +// moveSliceNoScan is like moveSlice except the element type is known to +// not have any pointers. We instead pass in the size of the element. +func moveSliceNoScan(elemSize uintptr, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) { + if cap == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + capmem := uintptr(cap) * elemSize + new := mallocgc(capmem, nil, false) + memmove(new, old, capmem) + return new, len, cap +} + +// moveSliceNoCap is like moveSlice, but can pick any appropriate capacity +// for the returned slice. +// Elements between len and cap in the returned slice will be zeroed. +func moveSliceNoCap(et *_type, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) { + if len == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + lenmem := uintptr(len) * et.Size_ + capmem := roundupsize(lenmem, false) + new := mallocgc(capmem, et, true) + bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), lenmem, et) + memmove(new, old, lenmem) + return new, len, int(capmem / et.Size_) +} + +// moveSliceNoCapNoScan is a combination of moveSliceNoScan and moveSliceNoCap. +func moveSliceNoCapNoScan(elemSize uintptr, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) { + if len == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + lenmem := uintptr(len) * elemSize + capmem := roundupsize(lenmem, true) + new := mallocgc(capmem, nil, false) + memmove(new, old, lenmem) + if capmem > lenmem { + memclrNoHeapPointers(add(new, lenmem), capmem-lenmem) + } + return new, len, int(capmem / elemSize) +} + +// growsliceBuf is like growslice, but we can use the given buffer +// as a backing store if we want. bufPtr must be on the stack. +func growsliceBuf(oldPtr unsafe.Pointer, newLen, oldCap, num int, et *_type, bufPtr unsafe.Pointer, bufLen int) slice { + if newLen > bufLen { + // Doesn't fit, process like a normal growslice. + return growslice(oldPtr, newLen, oldCap, num, et) + } + oldLen := newLen - num + if oldPtr != bufPtr && oldLen != 0 { + // Move data to start of buffer. + // Note: bufPtr is on the stack, so no write barrier needed. + memmove(bufPtr, oldPtr, uintptr(oldLen)*et.Size_) + } + // Pick a new capacity. + // + // Unlike growslice, we don't need to double the size each time. + // The work done here is not proportional to the length of the slice. + // (Unless the memmove happens above, but that is rare, and in any + // case there are not many elements on this path.) + // + // Instead, we try to just bump up to the next size class. + // This will ensure that we don't waste any space when we eventually + // call moveSlice with the resulting slice. + newCap := int(roundupsize(uintptr(newLen)*et.Size_, !et.Pointers()) / et.Size_) + + // Zero slice beyond newLen. + // The buffer is stack memory, so NoHeapPointers is ok. + // Caller will overwrite [oldLen:newLen], so we don't need to zero that portion. + // If et.Pointers(), buffer is at least initialized so we don't need to + // worry about the caller overwriting junk in [oldLen:newLen]. + if newLen < newCap { + memclrNoHeapPointers(add(bufPtr, uintptr(newLen)*et.Size_), uintptr(newCap-newLen)*et.Size_) + } + + return slice{bufPtr, newLen, newCap} +} |
