aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Tocar <ilya.tocar@intel.com>2016-08-24 17:57:01 +0300
committerKeith Randall <khr@golang.org>2026-04-06 16:36:04 -0700
commitf9f351b130d10ea1623ac0423d3ca9e4b07e3502 (patch)
tree40b59ae4066b5a717669e8c7cd3f041e6b51a5e2
parent0d0799f055dcc9b3b41df74bee3fbe398ae2f0e7 (diff)
downloadgo-f9f351b130d10ea1623ac0423d3ca9e4b07e3502.tar.xz
cmd/compile: add loop invariant code motion
(I copied Ilya Tocar's CL 27656 and heavily modified it.) This adds an optimization that moves loop invariant computations out of the loop. For example: a:= ... for ... { b:= a + 15 // uses of b } Turns into a:= ... b:= a + 15 for ... { // uses of b } Change-Id: I36c8c7e2b3bc1c5e6b4b293bed3a76dc20d6c825 Reviewed-on: https://go-review.googlesource.com/c/go/+/697235 Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
-rw-r--r--src/cmd/compile/internal/ssa/compile.go1
-rw-r--r--src/cmd/compile/internal/ssa/licm.go167
-rw-r--r--src/cmd/compile/internal/ssa/licm_test.go69
-rw-r--r--src/cmd/compile/internal/ssa/uses.go109
4 files changed, 346 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index 8e02da746e..3209ff6ceb 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -500,6 +500,7 @@ var passes = [...]pass{
{name: "tighten tuple selectors", fn: tightenTupleSelectors, required: true},
{name: "lowered deadcode", fn: deadcode, required: true},
{name: "checkLower", fn: checkLower, required: true},
+ {name: "loop invariant", fn: licm},
{name: "late phielim and copyelim", fn: copyelim},
{name: "tighten", fn: tighten, required: true}, // move values closer to their uses
{name: "merge conditional branches", fn: mergeConditionalBranches}, // generate conditional comparison instructions on ARM64 architecture
diff --git a/src/cmd/compile/internal/ssa/licm.go b/src/cmd/compile/internal/ssa/licm.go
new file mode 100644
index 0000000000..1cdb4b5186
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/licm.go
@@ -0,0 +1,167 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// We are looking for loops with following structure
+// (loop bodies may have control flow inside):
+//
+// +--------------+
+// | |
+// | preheader |
+// | |
+// +-------+------+
+// |
+// |
+// +-------v------+
+// | |
+// +------> header |
+// | | |
+// | +-------+------+
+// | |
+// | |
+// | +-------v------+
+// | | |
+// +------+ loop body |
+// | |
+// +--------------+
+//
+//
+// We consider all phis and memory operations as initial loop dependent set.
+// So loop independent values are all loop values,
+// minus transitive closure of initial loop dependent values.
+// We remove those values from their BBs and move them to preheader.
+
+func licm(f *Func) {
+ // See likelyadjust.go for details about loop info.
+ nest := loopnestfor(f)
+ if len(nest.loops) == 0 || nest.hasIrreducible {
+ return
+ }
+
+ uses := uses(f)
+ defer uses.free(f)
+
+ loopDependent := f.Cache.allocBoolSlice(f.NumValues())
+ defer f.Cache.freeBoolSlice(loopDependent)
+ queue := f.Cache.allocValueSlice(f.NumValues())
+ defer f.Cache.freeValueSlice(queue)
+ queue = queue[:0]
+
+ // Start with all values we can't move out of loops.
+ for _, b := range f.Blocks {
+ if loop := nest.b2l[b.ID]; loop == nil || !loop.isInner {
+ // Values outside any loop we don't care about.
+ // Values not in a leaf loop we can't handle.
+ continue
+ }
+ for _, v := range b.Values {
+ loopDep := false
+ if v.Op == OpPhi {
+ loopDep = true
+ } else if v.Type.IsMemory() {
+ // We can't move state-modifying code.
+ // (TODO: but maybe this is handled by memory Phis anyway?)
+ loopDep = true
+ } else if v.Type.IsFlags() || v.Type.IsTuple() && (v.Type.FieldType(0).IsFlags() || v.Type.FieldType(1).IsFlags()) {
+ // This is not required for correctness. It is just to
+ // keep the live range of flag values low.
+ loopDep = true
+ } else if opcodeTable[v.Op].nilCheck {
+ // NilCheck in case loop executes 0 times. (It has a memory arg anyway?)
+ loopDep = true
+ } else if v.MemoryArg() != nil {
+ // Because the state of memory might be different at the loop start. (Also handled by Phi?)
+ loopDep = true
+ } else if v.Type.IsPtr() {
+ // Can't move pointer arithmetic, as it may be guarded by conditionals
+ // and this could materialize a bad pointer across a safepoint.
+ loopDep = true
+ }
+ if loopDep {
+ loopDependent[v.ID] = true
+ queue = append(queue, v)
+ }
+ }
+ }
+
+ // If a value can't be moved out of a loop, neither can its users.
+ // The queue contains values which are loop dependent, but their users
+ // have not been marked as loop dependent yet.
+ for len(queue) > 0 {
+ v := queue[len(queue)-1]
+ queue = queue[:len(queue)-1]
+
+ for _, u := range uses.get(v) {
+ if loop := nest.b2l[u.Block.ID]; loop == nil || !loop.isInner {
+ continue // see above
+ }
+ if loopDependent[u.ID] {
+ continue
+ }
+ loopDependent[u.ID] = true
+ queue = append(queue, u)
+ }
+ }
+
+ // Anything not marked as loop-dependent can be moved out of its loop.
+ for _, b := range f.Blocks {
+ loop := nest.b2l[b.ID]
+ if loop == nil || !loop.isInner {
+ // loopDependent check is wrong for loops containing other loops,
+ // because then a value might have an argument computed inside
+ // a nested loop.
+ continue
+ }
+ if len(loop.header.Preds) != 2 {
+ continue // is never true?
+ }
+ anyMoved := false
+ for i, v := range b.Values {
+ if loopDependent[v.ID] {
+ continue
+ }
+ // Figure out where to move loop-independent values.
+ h := loop.header
+ var inIdx int
+ if int(h.Preds[0].b.ID) >= len(nest.b2l) || nest.b2l[h.Preds[0].b.ID] != loop {
+ inIdx = 0
+ } else {
+ inIdx = 1
+ }
+ dest := h.Preds[inIdx].b
+ if dest.Kind != BlockPlain {
+ outIdx := h.Preds[inIdx].i
+ // Introduce a new block between the loop
+ // header predecessor and the loop header itself.
+ mid := f.NewBlock(BlockPlain)
+ mid.Pos = dest.Pos
+ // Splice into graph.
+ mid.Preds = append(mid.Preds, Edge{dest, outIdx})
+ mid.Succs = append(mid.Succs, Edge{h, inIdx})
+ h.Preds[inIdx] = Edge{mid, 0}
+ dest.Succs[outIdx] = Edge{mid, 0}
+
+ dest = mid
+ }
+
+ b.Values[i] = nil
+ v.Block = dest
+ dest.Values = append(dest.Values, v)
+ anyMoved = true
+ }
+ if anyMoved {
+ // We just nil'd entries in b.Values above. Compact out the nils.
+ i := 0
+ for _, v := range b.Values {
+ if v == nil {
+ continue
+ }
+ b.Values[i] = v
+ i++
+ }
+ b.Values = b.Values[:i]
+ }
+ }
+}
diff --git a/src/cmd/compile/internal/ssa/licm_test.go b/src/cmd/compile/internal/ssa/licm_test.go
new file mode 100644
index 0000000000..fc1e22ef9f
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/licm_test.go
@@ -0,0 +1,69 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+ "cmd/compile/internal/types"
+ "testing"
+)
+
+func TestLICM(t *testing.T) {
+ c := testConfig(t)
+ fun := c.Fun("entry",
+ Bloc("entry",
+ Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+ Valu("sp", OpSP, c.config.Types.Uintptr, 0, nil),
+ Valu("a", OpConst64, c.config.Types.Int64, 14, nil),
+ Goto("loop")),
+ Bloc("loop",
+ Valu("b", OpConst64, c.config.Types.Int64, 26, nil),
+ Valu("sum", OpAdd64, c.config.Types.Int64, 0, nil, "a", "b"),
+ Valu("load", OpLoad, c.config.Types.BytePtr, 0, nil, "sp", "mem"),
+ Valu("nilptr", OpConstNil, c.config.Types.BytePtr, 0, nil),
+ Valu("bool", OpNeqPtr, c.config.Types.Bool, 0, nil, "load", "nilptr"),
+ If("bool", "loop", "exit")),
+ Bloc("exit",
+ Exit("mem")))
+
+ CheckFunc(fun.f)
+ licm(fun.f)
+ CheckFunc(fun.f)
+
+ b := fun.blocks["entry"]
+ if len(b.Values) != 5 {
+ // b,sum should have been moved from loop to entry
+ t.Errorf("loop invariant code wasn't lifted, but should have")
+ }
+}
+
+func TestLICMNewBlock(t *testing.T) {
+ c := testConfig(t)
+ fun := c.Fun("entry",
+ Bloc("entry",
+ Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+ Valu("sp", OpSP, c.config.Types.Uintptr, 0, nil),
+ Valu("a", OpConst64, c.config.Types.Int64, 14, nil),
+ Valu("bool2", OpConstBool, c.config.Types.Bool, 0, nil),
+ If("bool2", "loop", "exit")),
+ Bloc("loop",
+ Valu("b", OpConst64, c.config.Types.Int64, 26, nil),
+ Valu("sum", OpAdd64, c.config.Types.Int64, 0, nil, "a", "b"),
+ Valu("load", OpLoad, c.config.Types.BytePtr, 0, nil, "sp", "mem"),
+ Valu("nilptr", OpConstNil, c.config.Types.BytePtr, 0, nil),
+ Valu("bool", OpNeqPtr, c.config.Types.Bool, 0, nil, "load", "nilptr"),
+ If("bool", "loop", "exit")),
+ Bloc("exit",
+ Exit("mem")))
+
+ CheckFunc(fun.f)
+ licm(fun.f)
+ CheckFunc(fun.f)
+
+ b := fun.blocks["entry"].Succs[0].b
+ if len(b.Values) != 2 {
+ // b,sum should have been moved from loop to new block between entry & loop
+ t.Errorf("loop invariant code wasn't lifted, but should have")
+ }
+}
diff --git a/src/cmd/compile/internal/ssa/uses.go b/src/cmd/compile/internal/ssa/uses.go
new file mode 100644
index 0000000000..9dd4194d07
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/uses.go
@@ -0,0 +1,109 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// useInfo provides a map from a value to the users of that value.
+// We do not keep track of this data in the IR directly, as it is
+// expensive to keep updated. But sometimes we need to compute it
+// temporarily.
+//
+// A useInfo is only valid until the next modification of the IR.
+//
+// Note that block control uses are not reported. (TODO: add
+// if needed.)
+// Also, index of use is not reported. (TODO: add if needed.)
+//
+// We keep track of all uses in a single array, and use a
+// starts array to tell us where to find the sub-array for
+// each value.
+//
+// For a function with 10 values, we would have:
+//
+// starts[0] starts[1] starts[2] starts[9] len(uses)
+// | | | | |
+// v v v v v
+// +------------+------------+-...-+------------+
+// | uses of v0 | uses of v1 | ... | uses of v9 |
+// +------------+------------+-...-+------------+
+//
+// We can find all the uses of v by listing all entries
+// of uses between starts[v.ID] and starts[v.ID+1].
+type useInfo struct {
+ starts []int32
+ uses []*Value
+}
+
+// build useInfo for a function. Result only valid until
+// the next modification of f.
+func uses(f *Func) useInfo {
+ // Write down number of uses of each value.
+ idx := f.Cache.allocInt32Slice(f.NumValues())
+ for _, b := range f.Blocks {
+ for _, v := range b.Values {
+ idx[v.ID] = v.Uses
+ }
+ }
+
+ // Compute cumulative sum of uses up to and
+ // including each value ID.
+ var cum int32
+ for vid, uses := range idx {
+ cum += uses
+ idx[vid] = cum
+ }
+
+ // Compute uses.
+ uses := f.Cache.allocValueSlice(int(cum))
+ for _, b := range f.Blocks {
+ for _, v := range b.Values {
+ for _, a := range v.Args {
+ idx[a.ID]--
+ uses[idx[a.ID]] = v
+ }
+ }
+ }
+ for _, b := range f.Blocks {
+ for _, c := range b.ControlValues() {
+ // We don't track block control uses, but
+ // we have to decrement idx values here
+ // so that the accounting comes out right.
+ // Each value will have, at the start of its
+ // use list, a bunch of nils that represent
+ // the number of Block.Control uses.
+ idx[c.ID]--
+ }
+ }
+
+ // The loop above decremented each idx entry
+ // by the number of uses. It now contains
+ // the sum of uses up to, but not including,
+ // each value ID.
+ return useInfo{starts: idx, uses: uses}
+}
+
+// get returns a list of uses of v.
+// Every use in an argument slot is listed (e.g. for
+// w=(Add v v), w is listed twice in the uses of v).
+// Uses by Block.Controls are not reported.
+func (u useInfo) get(v *Value) []*Value {
+ i := u.starts[v.ID]
+ var j int32
+ if int(v.ID) < len(u.starts)-1 {
+ j = u.starts[v.ID+1]
+ } else {
+ j = int32(len(u.uses))
+ }
+ r := u.uses[i:j]
+ // skip nil entries from block control uses
+ for len(r) > 0 && r[0] == nil {
+ r = r[1:]
+ }
+ return r
+}
+
+func (u useInfo) free(f *Func) {
+ f.Cache.freeInt32Slice(u.starts)
+ f.Cache.freeValueSlice(u.uses)
+}