cmd/compile: add loop invariant code motion

(I copied Ilya Tocar's CL 27656 and heavily modified it.) This adds an optimization that moves loop invariant computations out of the loop. For example: a:= ... for ... { b:= a + 15 // uses of b } Turns into a:= ... b:= a + 15 for ... { // uses of b } Change-Id: I36c8c7e2b3bc1c5e6b4b293bed3a76dc20d6c825 Reviewed-on: https://go-review.googlesource.com/c/go/+/697235 Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
author: Ilya Tocar <ilya.tocar@intel.com> 2016-08-24 17:57:01 +0300
committer: Keith Randall <khr@golang.org> 2026-04-06 16:36:04 -0700
commit: f9f351b130d10ea1623ac0423d3ca9e4b07e3502 (patch)
tree: 40b59ae4066b5a717669e8c7cd3f041e6b51a5e2 /src/cmd
parent: 0d0799f055dcc9b3b41df74bee3fbe398ae2f0e7 (diff)
download: go-f9f351b130d10ea1623ac0423d3ca9e4b07e3502.tar.xz
4 files changed, 346 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index 8e02da746e..3209ff6ceb 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -500,6 +500,7 @@ var passes = [...]pass{
 	{name: "tighten tuple selectors", fn: tightenTupleSelectors, required: true},
 	{name: "lowered deadcode", fn: deadcode, required: true},
 	{name: "checkLower", fn: checkLower, required: true},
+	{name: "loop invariant", fn: licm},
 	{name: "late phielim and copyelim", fn: copyelim},
 	{name: "tighten", fn: tighten, required: true},                     // move values closer to their uses
 	{name: "merge conditional branches", fn: mergeConditionalBranches}, // generate conditional comparison instructions on ARM64 architecture
diff --git a/src/cmd/compile/internal/ssa/licm.go b/src/cmd/compile/internal/ssa/licm.go
new file mode 100644
index 0000000000..1cdb4b5186
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/licm.go
@@ -0,0 +1,167 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// We are looking for loops with following structure
+// (loop bodies may have control flow inside):
+//
+//              +--------------+
+//              |              |
+//              |  preheader   |
+//              |              |
+//              +-------+------+
+//                      |
+//                      |
+//              +-------v------+
+//              |              |
+//       +------>    header    |
+//       |      |              |
+//       |      +-------+------+
+//       |              |
+//       |              |
+//       |      +-------v------+
+//       |      |              |
+//       +------+  loop body   |
+//              |              |
+//              +--------------+
+//
+//
+// We consider all phis and memory operations as initial loop dependent set.
+// So loop independent values are all loop values,
+// minus transitive closure of initial loop dependent values.
+// We remove those values from their BBs and move them to preheader.
+
+func licm(f *Func) {
+	// See likelyadjust.go for details about loop info.
+	nest := loopnestfor(f)
+	if len(nest.loops) == 0 || nest.hasIrreducible {
+		return
+	}
+
+	uses := uses(f)
+	defer uses.free(f)
+
+	loopDependent := f.Cache.allocBoolSlice(f.NumValues())
+	defer f.Cache.freeBoolSlice(loopDependent)
+	queue := f.Cache.allocValueSlice(f.NumValues())
+	defer f.Cache.freeValueSlice(queue)
+	queue = queue[:0]
+
+	// Start with all values we can't move out of loops.
+	for _, b := range f.Blocks {
+		if loop := nest.b2l[b.ID]; loop == nil || !loop.isInner {
+			// Values outside any loop we don't care about.
+			// Values not in a leaf loop we can't handle.
+			continue
+		}
+		for _, v := range b.Values {
+			loopDep := false
+			if v.Op == OpPhi {
+				loopDep = true
+			} else if v.Type.IsMemory() {
+				// We can't move state-modifying code.
+				// (TODO: but maybe this is handled by memory Phis anyway?)
+				loopDep = true
+			} else if v.Type.IsFlags() || v.Type.IsTuple() && (v.Type.FieldType(0).IsFlags() || v.Type.FieldType(1).IsFlags()) {
+				// This is not required for correctness. It is just to
+				// keep the live range of flag values low.
+				loopDep = true
+			} else if opcodeTable[v.Op].nilCheck {
+				// NilCheck in case loop executes 0 times. (It has a memory arg anyway?)
+				loopDep = true
+			} else if v.MemoryArg() != nil {
+				// Because the state of memory might be different at the loop start. (Also handled by Phi?)
+				loopDep = true
+			} else if v.Type.IsPtr() {
+				// Can't move pointer arithmetic, as it may be guarded by conditionals
+				// and this could materialize a bad pointer across a safepoint.
+				loopDep = true
+			}
+			if loopDep {
+				loopDependent[v.ID] = true
+				queue = append(queue, v)
+			}
+		}
+	}
+
+	// If a value can't be moved out of a loop, neither can its users.
+	// The queue contains values which are loop dependent, but their users
+	// have not been marked as loop dependent yet.
+	for len(queue) > 0 {
+		v := queue[len(queue)-1]
+		queue = queue[:len(queue)-1]
+
+		for _, u := range uses.get(v) {
+			if loop := nest.b2l[u.Block.ID]; loop == nil || !loop.isInner {
+				continue // see above
+			}
+			if loopDependent[u.ID] {
+				continue
+			}
+			loopDependent[u.ID] = true
+			queue = append(queue, u)
+		}
+	}
+
+	// Anything not marked as loop-dependent can be moved out of its loop.
+	for _, b := range f.Blocks {
+		loop := nest.b2l[b.ID]
+		if loop == nil || !loop.isInner {
+			// loopDependent check is wrong for loops containing other loops,
+			// because then a value might have an argument computed inside
+			// a nested loop.
+			continue
+		}
+		if len(loop.header.Preds) != 2 {
+			continue // is never true?
+		}
+		anyMoved := false
+		for i, v := range b.Values {
+			if loopDependent[v.ID] {
+				continue
+			}
+			// Figure out where to move loop-independent values.
+			h := loop.header
+			var inIdx int
+			if int(h.Preds[0].b.ID) >= len(nest.b2l) || nest.b2l[h.Preds[0].b.ID] != loop {
+				inIdx = 0
+			} else {
+				inIdx = 1
+			}
+			dest := h.Preds[inIdx].b
+			if dest.Kind != BlockPlain {
+				outIdx := h.Preds[inIdx].i
+				// Introduce a new block between the loop
+				// header predecessor and the loop header itself.
+				mid := f.NewBlock(BlockPlain)
+				mid.Pos = dest.Pos
+				// Splice into graph.
+				mid.Preds = append(mid.Preds, Edge{dest, outIdx})
+				mid.Succs = append(mid.Succs, Edge{h, inIdx})
+				h.Preds[inIdx] = Edge{mid, 0}
+				dest.Succs[outIdx] = Edge{mid, 0}
+
+				dest = mid
+			}
+
+			b.Values[i] = nil
+			v.Block = dest
+			dest.Values = append(dest.Values, v)
+			anyMoved = true
+		}
+		if anyMoved {
+			// We just nil'd entries in b.Values above. Compact out the nils.
+			i := 0
+			for _, v := range b.Values {
+				if v == nil {
+					continue
+				}
+				b.Values[i] = v
+				i++
+			}
+			b.Values = b.Values[:i]
+		}
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/licm_test.go b/src/cmd/compile/internal/ssa/licm_test.go
new file mode 100644
index 0000000000..fc1e22ef9f
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/licm_test.go
@@ -0,0 +1,69 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"cmd/compile/internal/types"
+	"testing"
+)
+
+func TestLICM(t *testing.T) {
+	c := testConfig(t)
+	fun := c.Fun("entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+			Valu("sp", OpSP, c.config.Types.Uintptr, 0, nil),
+			Valu("a", OpConst64, c.config.Types.Int64, 14, nil),
+			Goto("loop")),
+		Bloc("loop",
+			Valu("b", OpConst64, c.config.Types.Int64, 26, nil),
+			Valu("sum", OpAdd64, c.config.Types.Int64, 0, nil, "a", "b"),
+			Valu("load", OpLoad, c.config.Types.BytePtr, 0, nil, "sp", "mem"),
+			Valu("nilptr", OpConstNil, c.config.Types.BytePtr, 0, nil),
+			Valu("bool", OpNeqPtr, c.config.Types.Bool, 0, nil, "load", "nilptr"),
+			If("bool", "loop", "exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	licm(fun.f)
+	CheckFunc(fun.f)
+
+	b := fun.blocks["entry"]
+	if len(b.Values) != 5 {
+		// b,sum should have been moved from loop to entry
+		t.Errorf("loop invariant code wasn't lifted, but should have")
+	}
+}
+
+func TestLICMNewBlock(t *testing.T) {
+	c := testConfig(t)
+	fun := c.Fun("entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+			Valu("sp", OpSP, c.config.Types.Uintptr, 0, nil),
+			Valu("a", OpConst64, c.config.Types.Int64, 14, nil),
+			Valu("bool2", OpConstBool, c.config.Types.Bool, 0, nil),
+			If("bool2", "loop", "exit")),
+		Bloc("loop",
+			Valu("b", OpConst64, c.config.Types.Int64, 26, nil),
+			Valu("sum", OpAdd64, c.config.Types.Int64, 0, nil, "a", "b"),
+			Valu("load", OpLoad, c.config.Types.BytePtr, 0, nil, "sp", "mem"),
+			Valu("nilptr", OpConstNil, c.config.Types.BytePtr, 0, nil),
+			Valu("bool", OpNeqPtr, c.config.Types.Bool, 0, nil, "load", "nilptr"),
+			If("bool", "loop", "exit")),
+		Bloc("exit",
+			Exit("mem")))
+
+	CheckFunc(fun.f)
+	licm(fun.f)
+	CheckFunc(fun.f)
+
+	b := fun.blocks["entry"].Succs[0].b
+	if len(b.Values) != 2 {
+		// b,sum should have been moved from loop to new block between entry & loop
+		t.Errorf("loop invariant code wasn't lifted, but should have")
+	}
+}
diff --git a/src/cmd/compile/internal/ssa/uses.go b/src/cmd/compile/internal/ssa/uses.go
new file mode 100644
index 0000000000..9dd4194d07
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/uses.go
@@ -0,0 +1,109 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// useInfo provides a map from a value to the users of that value.
+// We do not keep track of this data in the IR directly, as it is
+// expensive to keep updated. But sometimes we need to compute it
+// temporarily.
+//
+// A useInfo is only valid until the next modification of the IR.
+//
+// Note that block control uses are not reported. (TODO: add
+// if needed.)
+// Also, index of use is not reported. (TODO: add if needed.)
+//
+// We keep track of all uses in a single array, and use a
+// starts array to tell us where to find the sub-array for
+// each value.
+//
+// For a function with 10 values, we would have:
+//
+//	starts[0]    starts[1]  starts[2] starts[9]   len(uses)
+//	    |            |            |     |            |
+//	    v            v            v     v            v
+//	    +------------+------------+-...-+------------+
+//	    | uses of v0 | uses of v1 | ... | uses of v9 |
+//	    +------------+------------+-...-+------------+
+//
+// We can find all the uses of v by listing all entries
+// of uses between starts[v.ID] and starts[v.ID+1].
+type useInfo struct {
+	starts []int32
+	uses   []*Value
+}
+
+// build useInfo for a function. Result only valid until
+// the next modification of f.
+func uses(f *Func) useInfo {
+	// Write down number of uses of each value.
+	idx := f.Cache.allocInt32Slice(f.NumValues())
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			idx[v.ID] = v.Uses
+		}
+	}
+
+	// Compute cumulative sum of uses up to and
+	// including each value ID.
+	var cum int32
+	for vid, uses := range idx {
+		cum += uses
+		idx[vid] = cum
+	}
+
+	// Compute uses.
+	uses := f.Cache.allocValueSlice(int(cum))
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			for _, a := range v.Args {
+				idx[a.ID]--
+				uses[idx[a.ID]] = v
+			}
+		}
+	}
+	for _, b := range f.Blocks {
+		for _, c := range b.ControlValues() {
+			// We don't track block control uses, but
+			// we have to decrement idx values here
+			// so that the accounting comes out right.
+			// Each value will have, at the start of its
+			// use list, a bunch of nils that represent
+			// the number of Block.Control uses.
+			idx[c.ID]--
+		}
+	}
+
+	// The loop above decremented each idx entry
+	// by the number of uses. It now contains
+	// the sum of uses up to, but not including,
+	// each value ID.
+	return useInfo{starts: idx, uses: uses}
+}
+
+// get returns a list of uses of v.
+// Every use in an argument slot is listed (e.g. for
+// w=(Add v v), w is listed twice in the uses of v).
+// Uses by Block.Controls are not reported.
+func (u useInfo) get(v *Value) []*Value {
+	i := u.starts[v.ID]
+	var j int32
+	if int(v.ID) < len(u.starts)-1 {
+		j = u.starts[v.ID+1]
+	} else {
+		j = int32(len(u.uses))
+	}
+	r := u.uses[i:j]
+	// skip nil entries from block control uses
+	for len(r) > 0 && r[0] == nil {
+		r = r[1:]
+	}
+	return r
+}
+
+func (u useInfo) free(f *Func) {
+	f.Cache.freeInt32Slice(u.starts)
+	f.Cache.freeValueSlice(u.uses)
+}
author	Ilya Tocar <ilya.tocar@intel.com>	2016-08-24 17:57:01 +0300
committer	Keith Randall <khr@golang.org>	2026-04-06 16:36:04 -0700
commit	f9f351b130d10ea1623ac0423d3ca9e4b07e3502 (patch)
tree	40b59ae4066b5a717669e8c7cd3f041e6b51a5e2 /src/cmd
parent	0d0799f055dcc9b3b41df74bee3fbe398ae2f0e7 (diff)
download	go-f9f351b130d10ea1623ac0423d3ca9e4b07e3502.tar.xz