aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal
diff options
context:
space:
mode:
authorthepudds <thepudds1460@gmail.com>2025-11-25 09:57:50 -0500
committert hepudds <thepudds1460@gmail.com>2025-11-26 19:04:05 -0800
commit4879151d1dc9f951e4598bd433cd2142976ed39d (patch)
tree14570bf79b48bd12cb799ff71454ea94987fcf0b /src/cmd/compile/internal
parentd8269ab0d59212fed0f5975f7083f6bbbfc00ec4 (diff)
downloadgo-4879151d1dc9f951e4598bd433cd2142976ed39d.tar.xz
cmd/compile: introduce alias analysis and automatically free non-aliased memory after growslice
This CL is part of a set of CLs that attempt to reduce how much work the GC must do. See the design in https://go.dev/design/74299-runtime-freegc This CL updates the compiler to examine append calls to prove whether or not the slice is aliased. If proven unaliased, the compiler automatically inserts a call to a new runtime function introduced with this CL, runtime.growsliceNoAlias, which frees the old backing memory immediately after slice growth is complete and the old storage is logically dead. Two append benchmarks below show promising results, executing up to ~2x faster and up to factor of ~3 memory reduction with this CL. The approach works with multiple append calls for the same slice, including inside loops, and the final slice memory can be escaping, such as in a classic pattern of returning a slice from a function after the slice is built. (The final slice memory is never freed with this CL, though we have other work that tackles that.) An example target for this CL is we automatically free the intermediate memory for the appends in the loop in this function: func f1(input []int) []int { var s []int for _, x := range input { s = append(s, g(x)) // s cannot be aliased here if h(x) { s = append(s, x) // s cannot be aliased here } } return s // slice escapes at end } In this case, the compiler and the runtime collaborate so that the heap allocated backing memory for s is automatically freed after a successful grow. (For the first grow, there is nothing to free, but for the second and subsequent growths, the old heap memory is freed automatically.) The new runtime.growsliceNoAlias is primarily implemented by calling runtime.freegc, which we introduced in CL 673695. The high-level approach here is we step through the IR starting from a slice declaration and look for any operations that either alias the slice or might do so, and treat any IR construct we don't specifically handle as a potential alias (and therefore conservatively fall back to treating the slice as aliased when encountering something not understood). For loops, some additional care is required. We arrange the analysis so that an alias in the body of a loop causes all the appends in that same loop body to be marked aliased, even if the aliasing occurs after the append in the IR: func f2() { var s []int for i := range 10 { s = append(s, i) // aliased due to next line alias = s } } For nested loops, we analyse the nesting appropriately so that for example this append is still proven as non-aliased in the inner loop even though it aliased for the outer loop: func f3() { for range 10 { var s []int for i := range 10 { s = append(s, i) // append using non-aliased slice } alias = s } } A good starting point is the beginning of the test/escape_alias.go file, which starts with ~10 introductory examples with brief comments that attempt to illustrate the high-level approach. For more details, see the new .../internal/escape/alias.go file, especially the (*aliasAnalysis).analyze method. In the first benchmark, an append in a loop builds up a slice from nothing, where the slice elements are each 64 bytes. In the table below, 'count' is the number of appends. With 1 append, there is no opportunity for this CL to free memory. Once there are 2 appends, the growth from 1 element to 2 elements means the compiler-inserted growsliceNoAlias frees the 1-element array, and we see a ~33% reduction in memory use and a small reported speed improvement. As the number of appends increases for example to 5, we are at a ~20% speed improvement and ~45% memory reduction, and so on until we reach ~40% faster and ~50% less memory allocated at the end of the table. There can be variation in the reported numbers based on -randlayout, so this table is for 30 different values of -randlayout with a total n=150. (Even so, there is still some variation, so we probably should not read too much into small changes.) This is with GOAMD64=v3 on a VM that gcc reports is cascadelake. goos: linux goarch: amd64 pkg: runtime cpu: Intel(R) Xeon(R) CPU @ 2.80GHz │ old-1bb1f2bf0c │ freegc-8ba7421-ps16 │ │ sec/op │ sec/op vs base │ Append64Bytes/count=1-4 31.09n ± 2% 31.69n ± 1% +1.95% (n=150) Append64Bytes/count=2-4 73.31n ± 1% 70.27n ± 0% -4.15% (n=150) Append64Bytes/count=3-4 142.7n ± 1% 124.6n ± 1% -12.68% (n=150) Append64Bytes/count=4-4 149.6n ± 1% 127.7n ± 0% -14.64% (n=150) Append64Bytes/count=5-4 277.1n ± 1% 213.6n ± 0% -22.90% (n=150) Append64Bytes/count=6-4 280.7n ± 1% 216.5n ± 1% -22.87% (n=150) Append64Bytes/count=10-4 544.3n ± 1% 386.6n ± 0% -28.97% (n=150) Append64Bytes/count=20-4 1058.5n ± 1% 715.6n ± 1% -32.39% (n=150) Append64Bytes/count=50-4 2.121µ ± 1% 1.404µ ± 1% -33.83% (n=150) Append64Bytes/count=100-4 4.152µ ± 1% 2.736µ ± 1% -34.11% (n=150) Append64Bytes/count=200-4 7.753µ ± 1% 4.882µ ± 1% -37.03% (n=150) Append64Bytes/count=400-4 15.163µ ± 2% 9.273µ ± 1% -38.84% (n=150) geomean 601.8n 455.0n -24.39% │ old-1bb1f2bf0c │ freegc-8ba7421-ps16 │ │ B/op │ B/op vs base │ Append64Bytes/count=1-4 64.00 ± 0% 64.00 ± 0% ~ (n=150) Append64Bytes/count=2-4 192.0 ± 0% 128.0 ± 0% -33.33% (n=150) Append64Bytes/count=3-4 448.0 ± 0% 256.0 ± 0% -42.86% (n=150) Append64Bytes/count=4-4 448.0 ± 0% 256.0 ± 0% -42.86% (n=150) Append64Bytes/count=5-4 960.0 ± 0% 512.0 ± 0% -46.67% (n=150) Append64Bytes/count=6-4 960.0 ± 0% 512.0 ± 0% -46.67% (n=150) Append64Bytes/count=10-4 1.938Ki ± 0% 1.000Ki ± 0% -48.39% (n=150) Append64Bytes/count=20-4 3.938Ki ± 0% 2.001Ki ± 0% -49.18% (n=150) Append64Bytes/count=50-4 7.938Ki ± 0% 4.005Ki ± 0% -49.54% (n=150) Append64Bytes/count=100-4 15.938Ki ± 0% 8.021Ki ± 0% -49.67% (n=150) Append64Bytes/count=200-4 31.94Ki ± 0% 16.08Ki ± 0% -49.64% (n=150) Append64Bytes/count=400-4 63.94Ki ± 0% 32.33Ki ± 0% -49.44% (n=150) geomean 1.991Ki 1.124Ki -43.54% │ old-1bb1f2bf0c │ freegc-8ba7421-ps16 │ │ allocs/op │ allocs/op vs base │ Append64Bytes/count=1-4 1.000 ± 0% 1.000 ± 0% ~ (n=150) Append64Bytes/count=2-4 2.000 ± 0% 1.000 ± 0% -50.00% (n=150) Append64Bytes/count=3-4 3.000 ± 0% 1.000 ± 0% -66.67% (n=150) Append64Bytes/count=4-4 3.000 ± 0% 1.000 ± 0% -66.67% (n=150) Append64Bytes/count=5-4 4.000 ± 0% 1.000 ± 0% -75.00% (n=150) Append64Bytes/count=6-4 4.000 ± 0% 1.000 ± 0% -75.00% (n=150) Append64Bytes/count=10-4 5.000 ± 0% 1.000 ± 0% -80.00% (n=150) Append64Bytes/count=20-4 6.000 ± 0% 1.000 ± 0% -83.33% (n=150) Append64Bytes/count=50-4 7.000 ± 0% 1.000 ± 0% -85.71% (n=150) Append64Bytes/count=100-4 8.000 ± 0% 1.000 ± 0% -87.50% (n=150) Append64Bytes/count=200-4 9.000 ± 0% 1.000 ± 0% -88.89% (n=150) Append64Bytes/count=400-4 10.000 ± 0% 1.000 ± 0% -90.00% (n=150) geomean 4.331 1.000 -76.91% The second benchmark is similar, but instead uses an 8-byte integer for the slice element. The first 4 appends in the loop never call into the runtime thanks to the excellent CL 664299 introduced by Keith in Go 1.25 that allows some <= 32 byte dynamically-sized slices to be on the stack, so this CL is neutral for <= 32 bytes. Once the 5th append occurs at count=5, a grow happens via the runtime and heap allocates as normal, but freegc does not yet have anything to free, so we see a small ~1.4ns penalty reported there. But once the second growth happens, the older heap memory is now automatically freed by freegc, so we start to see some benefit in memory reductions and speed improvements, starting at a tiny speed improvement (close to a wash, or maybe noise) by the second growth before count=10, and building up to ~2x faster with ~68% fewer allocated bytes reported. goos: linux goarch: amd64 pkg: runtime cpu: Intel(R) Xeon(R) CPU @ 2.80GHz │ old-1bb1f2bf0c │ freegc-8ba7421-ps16 │ │ sec/op │ sec/op vs base │ AppendInt/count=1-4 2.978n ± 0% 2.969n ± 0% -0.30% (p=0.000 n=150) AppendInt/count=4-4 4.292n ± 3% 4.163n ± 3% ~ (p=0.528 n=150) AppendInt/count=5-4 33.50n ± 0% 34.93n ± 0% +4.25% (p=0.000 n=150) AppendInt/count=10-4 76.21n ± 1% 75.67n ± 0% -0.72% (p=0.000 n=150) AppendInt/count=20-4 150.6n ± 1% 133.0n ± 0% -11.65% (n=150) AppendInt/count=50-4 284.1n ± 1% 225.6n ± 0% -20.59% (n=150) AppendInt/count=100-4 544.2n ± 1% 392.4n ± 1% -27.89% (n=150) AppendInt/count=200-4 1051.5n ± 1% 702.3n ± 0% -33.21% (n=150) AppendInt/count=400-4 2.041µ ± 1% 1.312µ ± 1% -35.70% (n=150) AppendInt/count=1000-4 5.224µ ± 2% 2.851µ ± 1% -45.43% (n=150) AppendInt/count=2000-4 11.770µ ± 1% 6.010µ ± 1% -48.94% (n=150) AppendInt/count=3000-4 17.747µ ± 2% 8.264µ ± 1% -53.44% (n=150) geomean 331.8n 246.4n -25.72% │ old-1bb1f2bf0c │ freegc-8ba7421-ps16 │ │ B/op │ B/op vs base │ AppendInt/count=1-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=150) AppendInt/count=4-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=150) AppendInt/count=5-4 64.00 ± 0% 64.00 ± 0% ~ (p=1.000 n=150) AppendInt/count=10-4 192.0 ± 0% 128.0 ± 0% -33.33% (n=150) AppendInt/count=20-4 448.0 ± 0% 256.0 ± 0% -42.86% (n=150) AppendInt/count=50-4 960.0 ± 0% 512.0 ± 0% -46.67% (n=150) AppendInt/count=100-4 1.938Ki ± 0% 1.000Ki ± 0% -48.39% (n=150) AppendInt/count=200-4 3.938Ki ± 0% 2.001Ki ± 0% -49.18% (n=150) AppendInt/count=400-4 7.938Ki ± 0% 4.005Ki ± 0% -49.54% (n=150) AppendInt/count=1000-4 24.56Ki ± 0% 10.05Ki ± 0% -59.07% (n=150) AppendInt/count=2000-4 58.56Ki ± 0% 20.31Ki ± 0% -65.32% (n=150) AppendInt/count=3000-4 85.19Ki ± 0% 27.30Ki ± 0% -67.95% (n=150) geomean ² -42.81% │ old-1bb1f2bf0c │ freegc-8ba7421-ps16 │ │ allocs/op │ allocs/op vs base │ AppendInt/count=1-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=150) AppendInt/count=4-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=150) AppendInt/count=5-4 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=150) AppendInt/count=10-4 2.000 ± 0% 1.000 ± 0% -50.00% (n=150) AppendInt/count=20-4 3.000 ± 0% 1.000 ± 0% -66.67% (n=150) AppendInt/count=50-4 4.000 ± 0% 1.000 ± 0% -75.00% (n=150) AppendInt/count=100-4 5.000 ± 0% 1.000 ± 0% -80.00% (n=150) AppendInt/count=200-4 6.000 ± 0% 1.000 ± 0% -83.33% (n=150) AppendInt/count=400-4 7.000 ± 0% 1.000 ± 0% -85.71% (n=150) AppendInt/count=1000-4 9.000 ± 0% 1.000 ± 0% -88.89% (n=150) AppendInt/count=2000-4 11.000 ± 0% 1.000 ± 0% -90.91% (n=150) AppendInt/count=3000-4 12.000 ± 0% 1.000 ± 0% -91.67% (n=150) geomean ² -72.76% ² Of course, these are just microbenchmarks, but likely indicate there are some opportunities here. The immediately following CL 712422 tackles inlining and is able to get runtime.freegc working automatically with iterators such as used by slices.Collect, which becomes able to automatically free the intermediate memory from its repeated appends (which earlier in this work required a temporary hand edit to the slices package). For now, we only use the NoAlias version for element types without pointers while waiting on additional runtime support in CL 698515. Updates #74299 Change-Id: I1b9d286aa97c170dcc2e203ec0f8ca72d84e8221 Reviewed-on: https://go-review.googlesource.com/c/go/+/710015 Reviewed-by: Keith Randall <khr@google.com> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/cmd/compile/internal')
-rw-r--r--src/cmd/compile/internal/base/debug.go3
-rw-r--r--src/cmd/compile/internal/base/flag.go1
-rw-r--r--src/cmd/compile/internal/escape/alias.go528
-rw-r--r--src/cmd/compile/internal/escape/escape.go11
-rw-r--r--src/cmd/compile/internal/inline/interleaved/interleaved.go7
-rw-r--r--src/cmd/compile/internal/ir/expr.go19
-rw-r--r--src/cmd/compile/internal/ir/symtab.go2
-rw-r--r--src/cmd/compile/internal/ssa/_gen/generic.rules9
-rw-r--r--src/cmd/compile/internal/ssa/rewritegeneric.go16
-rw-r--r--src/cmd/compile/internal/ssagen/ssa.go23
-rw-r--r--src/cmd/compile/internal/test/free_test.go55
-rw-r--r--src/cmd/compile/internal/typecheck/_builtin/runtime.go2
-rw-r--r--src/cmd/compile/internal/typecheck/builtin.go2
13 files changed, 657 insertions, 21 deletions
diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go
index e32a07d461..5e0268bb88 100644
--- a/src/cmd/compile/internal/base/debug.go
+++ b/src/cmd/compile/internal/base/debug.go
@@ -32,9 +32,12 @@ type DebugFlags struct {
DwarfInl int `help:"print information about DWARF inlined function creation"`
EscapeMutationsCalls int `help:"print extra escape analysis diagnostics about mutations and calls" concurrent:"ok"`
EscapeDebug int `help:"print information about escape analysis and resulting optimizations" concurrent:"ok"`
+ EscapeAlias int `help:"print information about alias analysis" concurrent:"ok"`
+ EscapeAliasCheck int `help:"enable additional validation for alias analysis" concurrent:"ok"`
Export int `help:"print export data"`
FIPSHash string `help:"hash value for FIPS debugging" concurrent:"ok"`
Fmahash string `help:"hash value for use in debugging platform-dependent multiply-add use" concurrent:"ok"`
+ FreeAppend int `help:"insert frees of append results when proven safe (0 disabled, 1 enabled, 2 enabled + log)" concurrent:"ok"`
GCAdjust int `help:"log adjustments to GOGC" concurrent:"ok"`
GCCheck int `help:"check heap/gc use by compiler" concurrent:"ok"`
GCProg int `help:"print dump of GC programs"`
diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go
index 63cae41524..4a2b7c5434 100644
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@@ -182,6 +182,7 @@ func ParseFlags() {
Debug.AlignHot = 1
Debug.InlFuncsWithClosures = 1
Debug.InlStaticInit = 1
+ Debug.FreeAppend = 1
Debug.PGOInline = 1
Debug.PGODevirtualize = 2
Debug.SyncFrames = -1 // disable sync markers by default
diff --git a/src/cmd/compile/internal/escape/alias.go b/src/cmd/compile/internal/escape/alias.go
new file mode 100644
index 0000000000..f0351e8f67
--- /dev/null
+++ b/src/cmd/compile/internal/escape/alias.go
@@ -0,0 +1,528 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package escape
+
+import (
+ "cmd/compile/internal/base"
+ "cmd/compile/internal/ir"
+ "cmd/internal/src"
+ "fmt"
+ "maps"
+ "path/filepath"
+)
+
+type aliasAnalysis struct {
+ // fn is the function being analyzed.
+ fn *ir.Func
+
+ // candidateSlices are declared slices that
+ // start unaliased and might still be unaliased.
+ candidateSlices map[*ir.Name]candidateSlice
+
+ // noAliasAppends are appends that have been
+ // proven to use an unaliased slice.
+ noAliasAppends []*ir.CallExpr
+
+ // loops is a stack of observed loops,
+ // each with a list of candidate appends.
+ loops [][]candidateAppend
+
+ // State for optional validation checking (doubleCheck mode):
+ processed map[ir.Node]int // count of times each node was processed, for doubleCheck mode
+ doubleCheck bool // whether to do doubleCheck mode
+}
+
+// candidateSlice tracks information about a declared slice
+// that might be unaliased.
+type candidateSlice struct {
+ loopDepth int // depth of loop when slice was declared
+}
+
+// candidateAppend tracks information about an OAPPEND that
+// might be using an unaliased slice.
+type candidateAppend struct {
+ s *ir.Name // the slice argument in 's = append(s, ...)'
+ call *ir.CallExpr // the append call
+}
+
+// aliasAnalysis looks for specific patterns of slice usage and proves
+// that certain appends are operating on non-aliased slices.
+//
+// This allows us to emit calls to free the backing arrays for certain
+// non-aliased slices at runtime when we know the memory is logically dead.
+//
+// The analysis is conservative, giving up on any operation we do not
+// explicitly understand.
+func (aa *aliasAnalysis) analyze(fn *ir.Func) {
+ // Walk the function body to discover slice declarations, their uses,
+ // and any append that we can prove is using an unaliased slice.
+ //
+ // An example is:
+ //
+ // var s []T
+ // for _, v := range input {
+ // f()
+ // s = append(s, g(v)) // s cannot be aliased here
+ // h()
+ // }
+ // return s
+ //
+ // Here, we can prove that the append to s is operating on an unaliased slice,
+ // and that conclusion is unaffected by s later being returned and escaping.
+ //
+ // In contrast, in this example, the aliasing of s in the loop body means the
+ // append can be operating on an aliased slice, so we do not record s as unaliased:
+ //
+ // var s []T
+ // var alias []T
+ // for _, v := range input {
+ // s = append(s, v) // s is aliased on second pass through loop body
+ // alias = s
+ // }
+ //
+ // Arbitrary uses of s after an append do not affect the aliasing conclusion
+ // for that append, but only if the append cannot be revisited at execution time
+ // via a loop or goto.
+ //
+ // We track the loop depth when a slice was declared and verify all uses of a slice
+ // are non-aliasing until we return to that depth. In other words, we make sure
+ // we have processed any possible execution-time revisiting of the slice prior
+ // to making our final determination.
+ //
+ // This approach helps for example with nested loops, such as:
+ //
+ // var s []int
+ // for range 10 {
+ // for range 10 {
+ // s = append(s, 0) // s is proven as non-aliased here
+ // }
+ // }
+ // alias = s // both loops are complete
+ //
+ // Or in contrast:
+ //
+ // var s []int
+ // for range 10 {
+ // for range 10 {
+ // s = append(s, 0) // s is treated as aliased here
+ // }
+ // alias = s // aliased, and outermost loop cycles back
+ // }
+ //
+ // As we walk the function, we look for things like:
+ //
+ // 1. Slice declarations (currently supporting 'var s []T', 's := make([]T, ...)',
+ // and 's := []T{...}').
+ // 2. Appends to a slice of the form 's = append(s, ...)'.
+ // 3. Other uses of the slice, which we treat as potential aliasing outside
+ // of a few known safe cases.
+ // 4. A start of a loop, which we track in a stack so that
+ // any uses of a slice within a loop body are treated as potential
+ // aliasing, including statements in the loop body after an append.
+ // Candidate appends are stored in the loop stack at the loop depth of their
+ // corresponding slice declaration (rather than the loop depth of the append),
+ // which essentially postpones a decision about the candidate append.
+ // 5. An end of a loop, which pops the loop stack and allows us to
+ // conclusively treat candidate appends from the loop body based
+ // on the loop depth of the slice declaration.
+ //
+ // Note that as we pop a candidate append at the end of a loop, we know
+ // its corresponding slice was unaliased throughout the loop being popped
+ // if the slice is still in the candidate slice map (without having been
+ // removed for potential aliasing), and we know we can make a final decision
+ // about a candidate append if we have returned to the loop depth
+ // where its slice was declared. In other words, there is no unanalyzed
+ // control flow that could take us back at execution-time to the
+ // candidate append in the now analyzed loop. This helps for example
+ // with nested loops, such as in our examples just above.
+ //
+ // We give up on a particular candidate slice if we see any use of it
+ // that we don't explicitly understand, and we give up on all of
+ // our candidate slices if we see any goto or label, which could be
+ // unstructured control flow. (TODO(thepudds): we remove the goto/label
+ // restriction in a subsequent CL.)
+ //
+ // Note that the intended use is to indicate that a slice is safe to pass
+ // to runtime.freegc, which currently requires that the passed pointer
+ // point to the base of its heap object.
+ //
+ // Therefore, we currently do not allow any re-slicing of the slice, though we could
+ // potentially allow s[0:x] or s[:x] or similar. (Slice expressions that alter
+ // the capacity might be possible to allow with freegc changes, though they are
+ // currently disallowed here like all slice expressions).
+ //
+ // TODO(thepudds): we could support the slice being used as non-escaping function call parameter
+ // but to do that, we need to verify any creation of specials via user code triggers an escape,
+ // or mail better runtime.freegc support for specials, or have a temporary compile-time solution
+ // for specials. (Currently, this analysis side-steps specials because any use of a slice
+ // that might cause a user-created special will cause it to be treated as aliased, and
+ // separately, runtime.freegc handles profiling-related specials).
+
+ // Initialize.
+ aa.fn = fn
+ aa.candidateSlices = make(map[*ir.Name]candidateSlice) // slices that might be unaliased
+
+ // doubleCheck controls whether we do a sanity check of our processing logic
+ // by counting each node visited in our main pass, and then comparing those counts
+ // against a simple walk at the end. The main intent is to help catch missing
+ // any nodes squirreled away in some spot we forgot to examine in our main pass.
+ aa.doubleCheck = base.Debug.EscapeAliasCheck > 0
+ aa.processed = make(map[ir.Node]int)
+
+ if base.Debug.EscapeAlias >= 2 {
+ aa.diag(fn.Pos(), fn, "====== starting func", "======")
+ }
+
+ ir.DoChildren(fn, aa.visit)
+
+ for _, call := range aa.noAliasAppends {
+ if base.Debug.EscapeAlias >= 1 {
+ base.WarnfAt(call.Pos(), "alias analysis: append using non-aliased slice: %v in func %v",
+ call, fn)
+ }
+ if base.Debug.FreeAppend > 0 {
+ call.AppendNoAlias = true
+ }
+ }
+
+ if aa.doubleCheck {
+ doubleCheckProcessed(fn, aa.processed)
+ }
+}
+
+func (aa *aliasAnalysis) visit(n ir.Node) bool {
+ if n == nil {
+ return false
+ }
+
+ if base.Debug.EscapeAlias >= 3 {
+ fmt.Printf("%-25s alias analysis: visiting node: %12s %-18T %v\n",
+ fmtPosShort(n.Pos())+":", n.Op().String(), n, n)
+ }
+
+ // As we visit nodes, we want to ensure we handle all children
+ // without missing any (through ignorance or future changes).
+ // We do this by counting nodes as we visit them or otherwise
+ // declare a node to be fully processed.
+ //
+ // In particular, we want to ensure we don't miss the use
+ // of a slice in some expression that might be an aliasing usage.
+ //
+ // When doubleCheck is enabled, we compare the counts
+ // accumulated in our analysis against counts from a trivial walk,
+ // failing if there is any mismatch.
+ //
+ // This call here counts that we have visited this node n
+ // via our main visit method. (In contrast, some nodes won't
+ // be visited by the main visit method, but instead will be
+ // manually marked via countProcessed when we believe we have fully
+ // dealt with the node).
+ aa.countProcessed(n)
+
+ switch n.Op() {
+ case ir.ODCL:
+ decl := n.(*ir.Decl)
+
+ if decl.X != nil && decl.X.Type().IsSlice() && decl.X.Class == ir.PAUTO {
+ s := decl.X
+ if _, ok := aa.candidateSlices[s]; ok {
+ base.FatalfAt(n.Pos(), "candidate slice already tracked as candidate: %v", s)
+ }
+ if base.Debug.EscapeAlias >= 2 {
+ aa.diag(n.Pos(), s, "adding candidate slice", "(loop depth: %d)", len(aa.loops))
+ }
+ aa.candidateSlices[s] = candidateSlice{loopDepth: len(aa.loops)}
+ }
+ // No children aside from the declared ONAME.
+ aa.countProcessed(decl.X)
+ return false
+
+ case ir.ONAME:
+
+ // We are seeing a name we have not already handled in another case,
+ // so remove any corresponding candidate slice.
+ if n.Type().IsSlice() {
+ name := n.(*ir.Name)
+ _, ok := aa.candidateSlices[name]
+ if ok {
+ delete(aa.candidateSlices, name)
+ if base.Debug.EscapeAlias >= 2 {
+ aa.diag(n.Pos(), name, "removing candidate slice", "")
+ }
+ }
+ }
+ // No children.
+ return false
+
+ case ir.OAS2:
+ n := n.(*ir.AssignListStmt)
+ aa.analyzeAssign(n, n.Lhs, n.Rhs)
+ return false
+
+ case ir.OAS:
+ assign := n.(*ir.AssignStmt)
+ aa.analyzeAssign(n, []ir.Node{assign.X}, []ir.Node{assign.Y})
+ return false
+
+ case ir.OFOR, ir.ORANGE:
+ aa.visitList(n.Init())
+
+ if n.Op() == ir.ORANGE {
+ // TODO(thepudds): previously we visited this range expression
+ // in the switch just below, after pushing the loop. This current placement
+ // is more correct, but generate a test or find an example in stdlib or similar
+ // where it matters. (Our current tests do not complain.)
+ aa.visit(n.(*ir.RangeStmt).X)
+ }
+
+ // Push a new loop.
+ aa.loops = append(aa.loops, nil)
+
+ // Process the loop.
+ switch n.Op() {
+ case ir.OFOR:
+ forstmt := n.(*ir.ForStmt)
+ aa.visit(forstmt.Cond)
+ aa.visitList(forstmt.Body)
+ aa.visit(forstmt.Post)
+ case ir.ORANGE:
+ rangestmt := n.(*ir.RangeStmt)
+ aa.visit(rangestmt.Key)
+ aa.visit(rangestmt.Value)
+ aa.visitList(rangestmt.Body)
+ default:
+ base.Fatalf("loop not OFOR or ORANGE: %v", n)
+ }
+
+ // Pop the loop.
+ var candidateAppends []candidateAppend
+ candidateAppends, aa.loops = aa.loops[len(aa.loops)-1], aa.loops[:len(aa.loops)-1]
+ for _, a := range candidateAppends {
+ // We are done with the loop, so we can validate any candidate appends
+ // that have not had their slice removed yet. We know a slice is unaliased
+ // throughout the loop if the slice is still in the candidate slice map.
+ if cs, ok := aa.candidateSlices[a.s]; ok {
+ if cs.loopDepth == len(aa.loops) {
+ // We've returned to the loop depth where the slice was declared and
+ // hence made it all the way through any loops that started after
+ // that declaration.
+ if base.Debug.EscapeAlias >= 2 {
+ aa.diag(n.Pos(), a.s, "proved non-aliased append",
+ "(completed loop, decl at depth: %d)", cs.loopDepth)
+ }
+ aa.noAliasAppends = append(aa.noAliasAppends, a.call)
+ } else if cs.loopDepth < len(aa.loops) {
+ if base.Debug.EscapeAlias >= 2 {
+ aa.diag(n.Pos(), a.s, "cannot prove non-aliased append",
+ "(completed loop, decl at depth: %d)", cs.loopDepth)
+ }
+ } else {
+ panic("impossible: candidate slice loopDepth > current loop depth")
+ }
+ }
+ }
+ return false
+
+ case ir.OLEN, ir.OCAP:
+ n := n.(*ir.UnaryExpr)
+ if n.X.Op() == ir.ONAME {
+ // This does not disqualify a candidate slice.
+ aa.visitList(n.Init())
+ aa.countProcessed(n.X)
+ } else {
+ ir.DoChildren(n, aa.visit)
+ }
+ return false
+
+ case ir.OCLOSURE:
+ // Give up on all our in-progress slices.
+ closure := n.(*ir.ClosureExpr)
+ if base.Debug.EscapeAlias >= 2 {
+ aa.diag(n.Pos(), closure.Func, "clearing all in-progress slices due to OCLOSURE",
+ "(was %d in-progress slices)", len(aa.candidateSlices))
+ }
+ clear(aa.candidateSlices)
+ return ir.DoChildren(n, aa.visit)
+
+ case ir.OLABEL, ir.OGOTO:
+ // Give up on all our in-progress slices.
+ if base.Debug.EscapeAlias >= 2 {
+ aa.diag(n.Pos(), n, "clearing all in-progress slices due to label or goto",
+ "(was %d in-progress slices)", len(aa.candidateSlices))
+ }
+ clear(aa.candidateSlices)
+ return false
+
+ default:
+ return ir.DoChildren(n, aa.visit)
+ }
+}
+
+func (aa *aliasAnalysis) visitList(nodes []ir.Node) {
+ for _, n := range nodes {
+ aa.visit(n)
+ }
+}
+
+// analyzeAssign evaluates the assignment dsts... = srcs...
+//
+// assign is an *ir.AssignStmt or *ir.AssignListStmt.
+func (aa *aliasAnalysis) analyzeAssign(assign ir.Node, dsts, srcs []ir.Node) {
+ aa.visitList(assign.Init())
+ for i := range dsts {
+ dst := dsts[i]
+ src := srcs[i]
+
+ if dst.Op() != ir.ONAME || !dst.Type().IsSlice() {
+ // Nothing for us to do aside from visiting the remaining children.
+ aa.visit(dst)
+ aa.visit(src)
+ continue
+ }
+
+ // We have a slice being assigned to an ONAME.
+
+ // Check for simple zero value assignments to an ONAME, which we ignore.
+ if src == nil {
+ aa.countProcessed(dst)
+ continue
+ }
+
+ if base.Debug.EscapeAlias >= 4 {
+ srcfn := ""
+ if src.Op() == ir.ONAME {
+ srcfn = fmt.Sprintf("%v.", src.Name().Curfn)
+ }
+ aa.diag(assign.Pos(), assign, "visiting slice assignment", "%v.%v = %s%v (%s %T = %s %T)",
+ dst.Name().Curfn, dst, srcfn, src, dst.Op().String(), dst, src.Op().String(), src)
+ }
+
+ // Now check what we have on the RHS.
+ switch src.Op() {
+ // Cases:
+
+ // Check for s := make([]T, ...) or s := []T{...}, along with the '=' version
+ // of those which does not alias s as long as s is not used in the make.
+ //
+ // TODO(thepudds): we need to be sure that 's := []T{1,2,3}' does not end up backed by a
+ // global static. Ad-hoc testing indicates that example and similar seem to be
+ // stack allocated, but that was not exhaustive testing. We do have runtime.freegc
+ // able to throw if it finds a global static, but should test more.
+ //
+ // TODO(thepudds): could also possibly allow 's := append([]T(nil), ...)'
+ // and 's := append([]T{}, ...)'.
+ case ir.OMAKESLICE, ir.OSLICELIT:
+ name := dst.(*ir.Name)
+ if name.Class == ir.PAUTO {
+ if base.Debug.EscapeAlias > 1 {
+ aa.diag(assign.Pos(), assign, "assignment from make or slice literal", "")
+ }
+ // If this is Def=true, the ODCL in the init will causes this to be tracked
+ // as a candidate slice. We walk the init and RHS but avoid visiting the name
+ // in the LHS, which would remove the slice from the candidate list after it
+ // was just added.
+ aa.visit(src)
+ aa.countProcessed(name)
+ continue
+ }
+
+ // Check for s = append(s, <...>).
+ case ir.OAPPEND:
+ s := dst.(*ir.Name)
+ call := src.(*ir.CallExpr)
+ if call.Args[0] == s {
+ // Matches s = append(s, <...>).
+ // First visit other arguments in case they use s.
+ aa.visitList(call.Args[1:])
+ // Mark the call as processed, and s twice.
+ aa.countProcessed(s, call, s)
+
+ // We have now examined all non-ONAME children of assign.
+
+ // This is now the heart of the analysis.
+ // Check to see if this slice is a live candidate.
+ cs, ok := aa.candidateSlices[s]
+ if ok {
+ if cs.loopDepth == len(aa.loops) {
+ // No new loop has started after the declaration of s,
+ // so this is definitive.
+ if base.Debug.EscapeAlias >= 2 {
+ aa.diag(assign.Pos(), assign, "proved non-aliased append",
+ "(loop depth: %d, equals decl depth)", len(aa.loops))
+ }
+ aa.noAliasAppends = append(aa.noAliasAppends, call)
+ } else if cs.loopDepth < len(aa.loops) {
+ // A new loop has started since the declaration of s,
+ // so we can't validate this append yet, but
+ // remember it in case we can validate it later when
+ // all loops using s are done.
+ aa.loops[cs.loopDepth] = append(aa.loops[cs.loopDepth],
+ candidateAppend{s: s, call: call})
+ } else {
+ panic("impossible: candidate slice loopDepth > current loop depth")
+ }
+ }
+ continue
+ }
+ } // End of switch on src.Op().
+
+ // Reached bottom of the loop over assignments.
+ // If we get here, we need to visit the dst and src normally.
+ aa.visit(dst)
+ aa.visit(src)
+ }
+}
+
+func (aa *aliasAnalysis) countProcessed(nodes ...ir.Node) {
+ if aa.doubleCheck {
+ for _, n := range nodes {
+ aa.processed[n]++
+ }
+ }
+}
+
+func (aa *aliasAnalysis) diag(pos src.XPos, n ir.Node, what string, format string, args ...any) {
+ fmt.Printf("%-25s alias analysis: %-30s %-20s %s\n",
+ fmtPosShort(pos)+":",
+ what+":",
+ fmt.Sprintf("%v", n),
+ fmt.Sprintf(format, args...))
+}
+
+// doubleCheckProcessed does a sanity check for missed nodes in our visit.
+func doubleCheckProcessed(fn *ir.Func, processed map[ir.Node]int) {
+ // Do a trivial walk while counting the nodes
+ // to compare against the counts in processed.
+
+ observed := make(map[ir.Node]int)
+ var walk func(n ir.Node) bool
+ walk = func(n ir.Node) bool {
+ observed[n]++
+ return ir.DoChildren(n, walk)
+ }
+ ir.DoChildren(fn, walk)
+
+ if !maps.Equal(processed, observed) {
+ // The most likely mistake might be something was missed while building processed,
+ // so print extra details in that direction.
+ for n, observedCount := range observed {
+ processedCount, ok := processed[n]
+ if processedCount != observedCount || !ok {
+ base.WarnfAt(n.Pos(),
+ "alias analysis: mismatch for %T: %v: processed %d times, observed %d times",
+ n, n, processedCount, observedCount)
+ }
+ }
+ base.FatalfAt(fn.Pos(), "alias analysis: mismatch in visited nodes")
+ }
+}
+
+func fmtPosShort(xpos src.XPos) string {
+ // TODO(thepudds): I think I did this a simpler way a while ago? Or maybe add base.FmtPosShort
+ // or similar? Or maybe just use base.FmtPos and give up on nicely aligned log messages?
+ pos := base.Ctxt.PosTable.Pos(xpos)
+ shortLine := filepath.Base(pos.AbsFilename()) + ":" + pos.LineNumber()
+ return shortLine
+}
diff --git a/src/cmd/compile/internal/escape/escape.go b/src/cmd/compile/internal/escape/escape.go
index 59250edfef..9d01156eb8 100644
--- a/src/cmd/compile/internal/escape/escape.go
+++ b/src/cmd/compile/internal/escape/escape.go
@@ -8,6 +8,7 @@ import (
"fmt"
"go/constant"
"go/token"
+ "internal/goexperiment"
"cmd/compile/internal/base"
"cmd/compile/internal/ir"
@@ -369,6 +370,16 @@ func (b *batch) finish(fns []*ir.Func) {
}
}
}
+
+ if goexperiment.RuntimeFreegc {
+ // Look for specific patterns of usage, such as appends
+ // to slices that we can prove are not aliased.
+ for _, fn := range fns {
+ a := aliasAnalysis{}
+ a.analyze(fn)
+ }
+ }
+
}
// inMutualBatch reports whether function fn is in the batch of
diff --git a/src/cmd/compile/internal/inline/interleaved/interleaved.go b/src/cmd/compile/internal/inline/interleaved/interleaved.go
index 80a0cb97df..d66f1a84cc 100644
--- a/src/cmd/compile/internal/inline/interleaved/interleaved.go
+++ b/src/cmd/compile/internal/inline/interleaved/interleaved.go
@@ -20,6 +20,13 @@ import (
// DevirtualizeAndInlinePackage interleaves devirtualization and inlining on
// all functions within pkg.
func DevirtualizeAndInlinePackage(pkg *ir.Package, profile *pgoir.Profile) {
+ if base.Flag.W > 1 {
+ for _, fn := range typecheck.Target.Funcs {
+ s := fmt.Sprintf("\nbefore devirtualize-and-inline %v", fn.Sym())
+ ir.DumpList(s, fn.Body)
+ }
+ }
+
if profile != nil && base.Debug.PGODevirtualize > 0 {
// TODO(mdempsky): Integrate into DevirtualizeAndInlineFunc below.
ir.VisitFuncsBottomUp(typecheck.Target.Funcs, func(list []*ir.Func, recursive bool) {
diff --git a/src/cmd/compile/internal/ir/expr.go b/src/cmd/compile/internal/ir/expr.go
index 7f156b5e75..f32998f333 100644
--- a/src/cmd/compile/internal/ir/expr.go
+++ b/src/cmd/compile/internal/ir/expr.go
@@ -184,15 +184,16 @@ func (n *BinaryExpr) SetOp(op Op) {
// A CallExpr is a function call Fun(Args).
type CallExpr struct {
miniExpr
- Fun Node
- Args Nodes
- DeferAt Node
- RType Node `mknode:"-"` // see reflectdata/helpers.go
- KeepAlive []*Name // vars to be kept alive until call returns
- IsDDD bool
- GoDefer bool // whether this call is part of a go or defer statement
- NoInline bool // whether this call must not be inlined
- UseBuf bool // use stack buffer for backing store (OAPPEND only)
+ Fun Node
+ Args Nodes
+ DeferAt Node
+ RType Node `mknode:"-"` // see reflectdata/helpers.go
+ KeepAlive []*Name // vars to be kept alive until call returns
+ IsDDD bool
+ GoDefer bool // whether this call is part of a go or defer statement
+ NoInline bool // whether this call must not be inlined
+ UseBuf bool // use stack buffer for backing store (OAPPEND only)
+ AppendNoAlias bool // backing store proven to be unaliased (OAPPEND only)
// whether it's a runtime.KeepAlive call the compiler generates to
// keep a variable alive. See #73137.
IsCompilerVarLive bool
diff --git a/src/cmd/compile/internal/ir/symtab.go b/src/cmd/compile/internal/ir/symtab.go
index 3229735464..f0e91f6db3 100644
--- a/src/cmd/compile/internal/ir/symtab.go
+++ b/src/cmd/compile/internal/ir/symtab.go
@@ -30,6 +30,8 @@ type symsStruct struct {
Goschedguarded *obj.LSym
Growslice *obj.LSym
GrowsliceBuf *obj.LSym
+ GrowsliceBufNoAlias *obj.LSym
+ GrowsliceNoAlias *obj.LSym
MoveSlice *obj.LSym
MoveSliceNoScan *obj.LSym
MoveSliceNoCap *obj.LSym
diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules
index 7710f6f209..fe8fc5b262 100644
--- a/src/cmd/compile/internal/ssa/_gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/_gen/generic.rules
@@ -2054,8 +2054,13 @@
// See issue 56440.
// Note there are 2 rules here, one for the pre-decomposed []T result and one for
// the post-decomposed (*T,int,int) result. (The latter is generated after call expansion.)
-(SliceLen (SelectN [0] (StaticLECall {sym} _ newLen:(Const(64|32)) _ _ _ _))) && isSameCall(sym, "runtime.growslice") => newLen
-(SelectN [1] (StaticCall {sym} _ newLen:(Const(64|32)) _ _ _ _)) && v.Type.IsInteger() && isSameCall(sym, "runtime.growslice") => newLen
+// TODO(thepudds): we probably need the new growsliceBuf and growsliceBufNoAlias here as well?
+(SliceLen (SelectN [0] (StaticLECall {sym} _ newLen:(Const(64|32)) _ _ _ _)))
+ && (isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias"))
+ => newLen
+(SelectN [1] (StaticCall {sym} _ newLen:(Const(64|32)) _ _ _ _)) && v.Type.IsInteger()
+ && (isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias"))
+ => newLen
// Collapse moving A -> B -> C into just A -> C.
// Later passes (deadstore, elim unread auto) will remove the A -> B move, if possible.
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go
index fea126bd4d..dbbb7105af 100644
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -30157,7 +30157,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
return true
}
// match: (SelectN [1] (StaticCall {sym} _ newLen:(Const64) _ _ _ _))
- // cond: v.Type.IsInteger() && isSameCall(sym, "runtime.growslice")
+ // cond: v.Type.IsInteger() && (isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias"))
// result: newLen
for {
if auxIntToInt64(v.AuxInt) != 1 || v_0.Op != OpStaticCall || len(v_0.Args) != 6 {
@@ -30166,14 +30166,14 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
sym := auxToCall(v_0.Aux)
_ = v_0.Args[1]
newLen := v_0.Args[1]
- if newLen.Op != OpConst64 || !(v.Type.IsInteger() && isSameCall(sym, "runtime.growslice")) {
+ if newLen.Op != OpConst64 || !(v.Type.IsInteger() && (isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias"))) {
break
}
v.copyOf(newLen)
return true
}
// match: (SelectN [1] (StaticCall {sym} _ newLen:(Const32) _ _ _ _))
- // cond: v.Type.IsInteger() && isSameCall(sym, "runtime.growslice")
+ // cond: v.Type.IsInteger() && (isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias"))
// result: newLen
for {
if auxIntToInt64(v.AuxInt) != 1 || v_0.Op != OpStaticCall || len(v_0.Args) != 6 {
@@ -30182,7 +30182,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
sym := auxToCall(v_0.Aux)
_ = v_0.Args[1]
newLen := v_0.Args[1]
- if newLen.Op != OpConst32 || !(v.Type.IsInteger() && isSameCall(sym, "runtime.growslice")) {
+ if newLen.Op != OpConst32 || !(v.Type.IsInteger() && (isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias"))) {
break
}
v.copyOf(newLen)
@@ -30594,7 +30594,7 @@ func rewriteValuegeneric_OpSliceLen(v *Value) bool {
return true
}
// match: (SliceLen (SelectN [0] (StaticLECall {sym} _ newLen:(Const64) _ _ _ _)))
- // cond: isSameCall(sym, "runtime.growslice")
+ // cond: (isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias"))
// result: newLen
for {
if v_0.Op != OpSelectN || auxIntToInt64(v_0.AuxInt) != 0 {
@@ -30607,14 +30607,14 @@ func rewriteValuegeneric_OpSliceLen(v *Value) bool {
sym := auxToCall(v_0_0.Aux)
_ = v_0_0.Args[1]
newLen := v_0_0.Args[1]
- if newLen.Op != OpConst64 || !(isSameCall(sym, "runtime.growslice")) {
+ if newLen.Op != OpConst64 || !(isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias")) {
break
}
v.copyOf(newLen)
return true
}
// match: (SliceLen (SelectN [0] (StaticLECall {sym} _ newLen:(Const32) _ _ _ _)))
- // cond: isSameCall(sym, "runtime.growslice")
+ // cond: (isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias"))
// result: newLen
for {
if v_0.Op != OpSelectN || auxIntToInt64(v_0.AuxInt) != 0 {
@@ -30627,7 +30627,7 @@ func rewriteValuegeneric_OpSliceLen(v *Value) bool {
sym := auxToCall(v_0_0.Aux)
_ = v_0_0.Args[1]
newLen := v_0_0.Args[1]
- if newLen.Op != OpConst32 || !(isSameCall(sym, "runtime.growslice")) {
+ if newLen.Op != OpConst32 || !(isSameCall(sym, "runtime.growslice") || isSameCall(sym, "runtime.growsliceNoAlias")) {
break
}
v.copyOf(newLen)
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index 830e013697..33fcf979c5 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -12,6 +12,7 @@ import (
"go/constant"
"html"
"internal/buildcfg"
+ "internal/goexperiment"
"internal/runtime/gc"
"os"
"path/filepath"
@@ -125,6 +126,8 @@ func InitConfig() {
ir.Syms.Goschedguarded = typecheck.LookupRuntimeFunc("goschedguarded")
ir.Syms.Growslice = typecheck.LookupRuntimeFunc("growslice")
ir.Syms.GrowsliceBuf = typecheck.LookupRuntimeFunc("growsliceBuf")
+ ir.Syms.GrowsliceBufNoAlias = typecheck.LookupRuntimeFunc("growsliceBufNoAlias")
+ ir.Syms.GrowsliceNoAlias = typecheck.LookupRuntimeFunc("growsliceNoAlias")
ir.Syms.MoveSlice = typecheck.LookupRuntimeFunc("moveSlice")
ir.Syms.MoveSliceNoScan = typecheck.LookupRuntimeFunc("moveSliceNoScan")
ir.Syms.MoveSliceNoCap = typecheck.LookupRuntimeFunc("moveSliceNoCap")
@@ -4048,9 +4051,25 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
s.defvars[s.f.Entry.ID][memVar] = mem
info.usedStatic = true
}
- r = s.rtcall(ir.Syms.GrowsliceBuf, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr, s.addr(info.store), s.constInt(types.Types[types.TINT], info.K))
+ fn := ir.Syms.GrowsliceBuf
+ if goexperiment.RuntimeFreegc && n.AppendNoAlias && !et.HasPointers() {
+ // The append is for a non-aliased slice where the runtime knows how to free
+ // the old logically dead backing store after growth.
+ // TODO(thepudds): for now, we only use the NoAlias version for element types
+ // without pointers while waiting on additional runtime support (CL 698515).
+ fn = ir.Syms.GrowsliceBufNoAlias
+ }
+ r = s.rtcall(fn, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr, s.addr(info.store), s.constInt(types.Types[types.TINT], info.K))
} else {
- r = s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr)
+ fn := ir.Syms.Growslice
+ if goexperiment.RuntimeFreegc && n.AppendNoAlias && !et.HasPointers() {
+ // The append is for a non-aliased slice where the runtime knows how to free
+ // the old logically dead backing store after growth.
+ // TODO(thepudds): for now, we only use the NoAlias version for element types
+ // without pointers while waiting on additional runtime support (CL 698515).
+ fn = ir.Syms.GrowsliceNoAlias
+ }
+ r = s.rtcall(fn, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr)
}
// Decompose output slice
diff --git a/src/cmd/compile/internal/test/free_test.go b/src/cmd/compile/internal/test/free_test.go
new file mode 100644
index 0000000000..061cc9a6e4
--- /dev/null
+++ b/src/cmd/compile/internal/test/free_test.go
@@ -0,0 +1,55 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import (
+ "internal/asan"
+ "internal/goexperiment"
+ "internal/msan"
+ "internal/race"
+ "testing"
+)
+
+func TestFreeAppendAllocations(t *testing.T) {
+ t.Run("slice-no-alias", func(t *testing.T) {
+ if !goexperiment.RuntimeFreegc {
+ t.Skip("skipping allocation test when runtime.freegc is disabled")
+ }
+ if race.Enabled || msan.Enabled || asan.Enabled {
+ // TODO(thepudds): we get 8 allocs for slice-no-alias instead of 1 with -race. This
+ // might be expected given some allocation optimizations are already disabled
+ // under race, but if not, we might need to update walk.
+ t.Skip("skipping allocation test under race detector and other sanitizers")
+ }
+
+ allocs := testing.AllocsPerRun(100, func() {
+ var s []int64
+ for i := range 100 {
+ s = append(s, int64(i))
+ }
+ _ = s
+ })
+ t.Logf("allocs: %v", allocs)
+ if allocs != 1 {
+ t.Errorf("allocs: %v, want 1", allocs)
+ }
+ })
+
+ t.Run("slice-aliased", func(t *testing.T) {
+ allocs := testing.AllocsPerRun(100, func() {
+ var s []int64
+ var alias []int64
+ for i := range 100 {
+ s = append(s, int64(i))
+ alias = s
+ }
+ _ = alias
+ })
+ t.Logf("allocs: %v", allocs)
+ if allocs < 2 {
+ t.Errorf("allocs: %v, want >= 2", allocs)
+ }
+ })
+}
diff --git a/src/cmd/compile/internal/typecheck/_builtin/runtime.go b/src/cmd/compile/internal/typecheck/_builtin/runtime.go
index 35fbbb6b12..a7603c3e33 100644
--- a/src/cmd/compile/internal/typecheck/_builtin/runtime.go
+++ b/src/cmd/compile/internal/typecheck/_builtin/runtime.go
@@ -197,6 +197,8 @@ func makeslice64(typ *byte, len int64, cap int64) unsafe.Pointer
func makeslicecopy(typ *byte, tolen int, fromlen int, from unsafe.Pointer) unsafe.Pointer
func growslice(oldPtr *any, newLen, oldCap, num int, et *byte) (ary []any)
func growsliceBuf(oldPtr *any, newLen, oldCap, num int, et *byte, buf *any, bufLen int) (ary []any)
+func growsliceBufNoAlias(oldPtr *any, newLen, oldCap, num int, et *byte, buf *any, bufLen int) (ary []any)
+func growsliceNoAlias(oldPtr *any, newLen, oldCap, num int, et *byte) (ary []any)
func unsafeslicecheckptr(typ *byte, ptr unsafe.Pointer, len int64)
func panicunsafeslicelen()
func panicunsafeslicenilptr()
diff --git a/src/cmd/compile/internal/typecheck/builtin.go b/src/cmd/compile/internal/typecheck/builtin.go
index 8a505073f7..955e65e598 100644
--- a/src/cmd/compile/internal/typecheck/builtin.go
+++ b/src/cmd/compile/internal/typecheck/builtin.go
@@ -162,6 +162,8 @@ var runtimeDecls = [...]struct {
{"makeslicecopy", funcTag, 125},
{"growslice", funcTag, 127},
{"growsliceBuf", funcTag, 128},
+ {"growsliceBufNoAlias", funcTag, 128},
+ {"growsliceNoAlias", funcTag, 127},
{"unsafeslicecheckptr", funcTag, 129},
{"panicunsafeslicelen", funcTag, 9},
{"panicunsafeslicenilptr", funcTag, 9},