2 files changed, 423 insertions, 0 deletions
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index e31d5dccb2..a9e8fc1610 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -399,3 +399,107 @@ func bytealg_MakeNoZero(len int) []byte {
 	cap := roundupsize(uintptr(len), true)
 	return unsafe.Slice((*byte)(mallocgc(cap, nil, false)), cap)[:len]
 }
+
+// moveSlice copies the input slice to the heap and returns it.
+// et is the element type of the slice.
+func moveSlice(et *_type, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) {
+	if cap == 0 {
+		if old != nil {
+			old = unsafe.Pointer(&zerobase)
+		}
+		return old, 0, 0
+	}
+	capmem := uintptr(cap) * et.Size_
+	new := mallocgc(capmem, et, true)
+	bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), capmem, et)
+	memmove(new, old, capmem)
+	return new, len, cap
+}
+
+// moveSliceNoScan is like moveSlice except the element type is known to
+// not have any pointers. We instead pass in the size of the element.
+func moveSliceNoScan(elemSize uintptr, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) {
+	if cap == 0 {
+		if old != nil {
+			old = unsafe.Pointer(&zerobase)
+		}
+		return old, 0, 0
+	}
+	capmem := uintptr(cap) * elemSize
+	new := mallocgc(capmem, nil, false)
+	memmove(new, old, capmem)
+	return new, len, cap
+}
+
+// moveSliceNoCap is like moveSlice, but can pick any appropriate capacity
+// for the returned slice.
+// Elements between len and cap in the returned slice will be zeroed.
+func moveSliceNoCap(et *_type, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) {
+	if len == 0 {
+		if old != nil {
+			old = unsafe.Pointer(&zerobase)
+		}
+		return old, 0, 0
+	}
+	lenmem := uintptr(len) * et.Size_
+	capmem := roundupsize(lenmem, false)
+	new := mallocgc(capmem, et, true)
+	bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), lenmem, et)
+	memmove(new, old, lenmem)
+	return new, len, int(capmem / et.Size_)
+}
+
+// moveSliceNoCapNoScan is a combination of moveSliceNoScan and moveSliceNoCap.
+func moveSliceNoCapNoScan(elemSize uintptr, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) {
+	if len == 0 {
+		if old != nil {
+			old = unsafe.Pointer(&zerobase)
+		}
+		return old, 0, 0
+	}
+	lenmem := uintptr(len) * elemSize
+	capmem := roundupsize(lenmem, true)
+	new := mallocgc(capmem, nil, false)
+	memmove(new, old, lenmem)
+	if capmem > lenmem {
+		memclrNoHeapPointers(add(new, lenmem), capmem-lenmem)
+	}
+	return new, len, int(capmem / elemSize)
+}
+
+// growsliceBuf is like growslice, but we can use the given buffer
+// as a backing store if we want. bufPtr must be on the stack.
+func growsliceBuf(oldPtr unsafe.Pointer, newLen, oldCap, num int, et *_type, bufPtr unsafe.Pointer, bufLen int) slice {
+	if newLen > bufLen {
+		// Doesn't fit, process like a normal growslice.
+		return growslice(oldPtr, newLen, oldCap, num, et)
+	}
+	oldLen := newLen - num
+	if oldPtr != bufPtr && oldLen != 0 {
+		// Move data to start of buffer.
+		// Note: bufPtr is on the stack, so no write barrier needed.
+		memmove(bufPtr, oldPtr, uintptr(oldLen)*et.Size_)
+	}
+	// Pick a new capacity.
+	//
+	// Unlike growslice, we don't need to double the size each time.
+	// The work done here is not proportional to the length of the slice.
+	// (Unless the memmove happens above, but that is rare, and in any
+	// case there are not many elements on this path.)
+	//
+	// Instead, we try to just bump up to the next size class.
+	// This will ensure that we don't waste any space when we eventually
+	// call moveSlice with the resulting slice.
+	newCap := int(roundupsize(uintptr(newLen)*et.Size_, !et.Pointers()) / et.Size_)
+
+	// Zero slice beyond newLen.
+	// The buffer is stack memory, so NoHeapPointers is ok.
+	// Caller will overwrite [oldLen:newLen], so we don't need to zero that portion.
+	// If et.Pointers(), buffer is at least initialized so we don't need to
+	// worry about the caller overwriting junk in [oldLen:newLen].
+	if newLen < newCap {
+		memclrNoHeapPointers(add(bufPtr, uintptr(newLen)*et.Size_), uintptr(newCap-newLen)*et.Size_)
+	}
+
+	return slice{bufPtr, newLen, newCap}
+}
diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go
index cd2bc26d1e..5463b6c02f 100644
--- a/src/runtime/slice_test.go
+++ b/src/runtime/slice_test.go
@@ -6,6 +6,9 @@ package runtime_test
 
 import (
 	"fmt"
+	"internal/race"
+	"internal/testenv"
+	"runtime"
 	"testing"
 )
 
@@ -499,3 +502,319 @@ func BenchmarkAppendInPlace(b *testing.B) {
 
 	})
 }
+
+//go:noinline
+func byteSlice(n int) []byte {
+	var r []byte
+	for i := range n {
+		r = append(r, byte(i))
+	}
+	return r
+}
+func TestAppendByteInLoop(t *testing.T) {
+	testenv.SkipIfOptimizationOff(t)
+	if race.Enabled {
+		t.Skip("skipping in -race mode")
+	}
+	for _, test := range [][3]int{
+		{0, 0, 0},
+		{1, 1, 8},
+		{2, 1, 8},
+		{8, 1, 8},
+		{9, 1, 16},
+		{16, 1, 16},
+		{17, 1, 24},
+		{24, 1, 24},
+		{25, 1, 32},
+		{32, 1, 32},
+		{33, 1, 64}, // If we up the stack buffer size from 32->64, this line and the next would become 48.
+		{48, 1, 64},
+		{49, 1, 64},
+		{64, 1, 64},
+		{65, 2, 128},
+	} {
+		n := test[0]
+		want := test[1]
+		wantCap := test[2]
+		var r []byte
+		got := testing.AllocsPerRun(10, func() {
+			r = byteSlice(n)
+		})
+		if got != float64(want) {
+			t.Errorf("for size %d, got %f allocs want %d", n, got, want)
+		}
+		if cap(r) != wantCap {
+			t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap)
+		}
+	}
+}
+
+//go:noinline
+func ptrSlice(n int, p *[]*byte) {
+	var r []*byte
+	for range n {
+		r = append(r, nil)
+	}
+	*p = r
+}
+func TestAppendPtrInLoop(t *testing.T) {
+	testenv.SkipIfOptimizationOff(t)
+	if race.Enabled {
+		t.Skip("skipping in -race mode")
+	}
+	var tests [][3]int
+	if runtime.PtrSize == 8 {
+		tests = [][3]int{
+			{0, 0, 0},
+			{1, 1, 1},
+			{2, 1, 2},
+			{3, 1, 3}, // This is the interesting case, allocates 24 bytes when before it was 32.
+			{4, 1, 4},
+			{5, 1, 8},
+			{6, 1, 8},
+			{7, 1, 8},
+			{8, 1, 8},
+			{9, 2, 16},
+		}
+	} else {
+		tests = [][3]int{
+			{0, 0, 0},
+			{1, 1, 2},
+			{2, 1, 2},
+			{3, 1, 4},
+			{4, 1, 4},
+			{5, 1, 6}, // These two are also 24 bytes instead of 32.
+			{6, 1, 6}, //
+			{7, 1, 8},
+			{8, 1, 8},
+			{9, 1, 16},
+			{10, 1, 16},
+			{11, 1, 16},
+			{12, 1, 16},
+			{13, 1, 16},
+			{14, 1, 16},
+			{15, 1, 16},
+			{16, 1, 16},
+			{17, 2, 32},
+		}
+	}
+	for _, test := range tests {
+		n := test[0]
+		want := test[1]
+		wantCap := test[2]
+		var r []*byte
+		got := testing.AllocsPerRun(10, func() {
+			ptrSlice(n, &r)
+		})
+		if got != float64(want) {
+			t.Errorf("for size %d, got %f allocs want %d", n, got, want)
+		}
+		if cap(r) != wantCap {
+			t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap)
+		}
+	}
+}
+
+//go:noinline
+func byteCapSlice(n int) ([]byte, int) {
+	var r []byte
+	for i := range n {
+		r = append(r, byte(i))
+	}
+	return r, cap(r)
+}
+func TestAppendByteCapInLoop(t *testing.T) {
+	testenv.SkipIfOptimizationOff(t)
+	if race.Enabled {
+		t.Skip("skipping in -race mode")
+	}
+	for _, test := range [][3]int{
+		{0, 0, 0},
+		{1, 1, 8},
+		{2, 1, 8},
+		{8, 1, 8},
+		{9, 1, 16},
+		{16, 1, 16},
+		{17, 1, 24},
+		{24, 1, 24},
+		{25, 1, 32},
+		{32, 1, 32},
+		{33, 1, 64},
+		{48, 1, 64},
+		{49, 1, 64},
+		{64, 1, 64},
+		{65, 2, 128},
+	} {
+		n := test[0]
+		want := test[1]
+		wantCap := test[2]
+		var r []byte
+		got := testing.AllocsPerRun(10, func() {
+			r, _ = byteCapSlice(n)
+		})
+		if got != float64(want) {
+			t.Errorf("for size %d, got %f allocs want %d", n, got, want)
+		}
+		if cap(r) != wantCap {
+			t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap)
+		}
+	}
+}
+
+func TestAppendGeneric(t *testing.T) {
+	type I *int
+	r := testAppendGeneric[I](100)
+	if len(r) != 100 {
+		t.Errorf("bad length")
+	}
+}
+
+//go:noinline
+func testAppendGeneric[E any](n int) []E {
+	var r []E
+	var z E
+	for range n {
+		r = append(r, z)
+	}
+	return r
+}
+
+func appendSomeBytes(r []byte, s []byte) []byte {
+	for _, b := range s {
+		r = append(r, b)
+	}
+	return r
+}
+
+func TestAppendOfArg(t *testing.T) {
+	r := make([]byte, 24)
+	for i := 0; i < 24; i++ {
+		r[i] = byte(i)
+	}
+	appendSomeBytes(r, []byte{25, 26, 27})
+	// Do the same thing, trying to overwrite any
+	// stack-allocated buffers used above.
+	s := make([]byte, 24)
+	for i := 0; i < 24; i++ {
+		s[i] = 99
+	}
+	appendSomeBytes(s, []byte{99, 99, 99})
+	// Check that we still have the right data.
+	for i, b := range r {
+		if b != byte(i) {
+			t.Errorf("r[%d]=%d, want %d", i, b, byte(i))
+		}
+	}
+
+}
+
+func BenchmarkAppendInLoop(b *testing.B) {
+	for _, size := range []int{0, 1, 8, 16, 32, 64, 128} {
+		b.Run(fmt.Sprintf("%d", size),
+			func(b *testing.B) {
+				b.ReportAllocs()
+				for b.Loop() {
+					byteSlice(size)
+				}
+			})
+	}
+}
+
+func TestMoveToHeapEarly(t *testing.T) {
+	// Just checking that this compiles.
+	var x []int
+	y := x // causes a move2heap in the entry block
+	for range 5 {
+		x = append(x, 5)
+	}
+	_ = y
+}
+
+func TestMoveToHeapCap(t *testing.T) {
+	var c int
+	r := func() []byte {
+		var s []byte
+		for i := range 10 {
+			s = append(s, byte(i))
+		}
+		c = cap(s)
+		return s
+	}()
+	if c != cap(r) {
+		t.Errorf("got cap=%d, want %d", c, cap(r))
+	}
+	sinkSlice = r
+}
+
+//go:noinline
+func runit(f func()) {
+	f()
+}
+
+func TestMoveToHeapClosure1(t *testing.T) {
+	var c int
+	r := func() []byte {
+		var s []byte
+		for i := range 10 {
+			s = append(s, byte(i))
+		}
+		runit(func() {
+			c = cap(s)
+		})
+		return s
+	}()
+	if c != cap(r) {
+		t.Errorf("got cap=%d, want %d", c, cap(r))
+	}
+	sinkSlice = r
+}
+func TestMoveToHeapClosure2(t *testing.T) {
+	var c int
+	r := func() []byte {
+		var s []byte
+		for i := range 10 {
+			s = append(s, byte(i))
+		}
+		c = func() int {
+			return cap(s)
+		}()
+		return s
+	}()
+	if c != cap(r) {
+		t.Errorf("got cap=%d, want %d", c, cap(r))
+	}
+	sinkSlice = r
+}
+
+//go:noinline
+func buildClosure(t *testing.T) ([]byte, func()) {
+	var s []byte
+	for i := range 20 {
+		s = append(s, byte(i))
+	}
+	c := func() {
+		for i, b := range s {
+			if b != byte(i) {
+				t.Errorf("s[%d]=%d, want %d", i, b, i)
+			}
+		}
+	}
+	return s, c
+}
+
+func TestMoveToHeapClosure3(t *testing.T) {
+	_, f := buildClosure(t)
+	overwriteStack(0)
+	f()
+}
+
+//go:noinline
+func overwriteStack(n int) uint64 {
+	var x [100]uint64
+	for i := range x {
+		x[i] = 0xabcdabcdabcdabcd
+	}
+	return x[n]
+}
+
+var sinkSlice []byte