diff options
| author | Ilya Tocar <ilya.tocar@intel.com> | 2016-08-24 17:57:01 +0300 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2026-04-06 16:36:04 -0700 |
| commit | f9f351b130d10ea1623ac0423d3ca9e4b07e3502 (patch) | |
| tree | 40b59ae4066b5a717669e8c7cd3f041e6b51a5e2 /src/cmd | |
| parent | 0d0799f055dcc9b3b41df74bee3fbe398ae2f0e7 (diff) | |
| download | go-f9f351b130d10ea1623ac0423d3ca9e4b07e3502.tar.xz | |
cmd/compile: add loop invariant code motion
(I copied Ilya Tocar's CL 27656 and heavily modified it.)
This adds an optimization that moves loop invariant computations
out of the loop. For example:
a:= ...
for ... {
b:= a + 15
// uses of b
}
Turns into
a:= ...
b:= a + 15
for ... {
// uses of b
}
Change-Id: I36c8c7e2b3bc1c5e6b4b293bed3a76dc20d6c825
Reviewed-on: https://go-review.googlesource.com/c/go/+/697235
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src/cmd')
| -rw-r--r-- | src/cmd/compile/internal/ssa/compile.go | 1 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/licm.go | 167 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/licm_test.go | 69 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/uses.go | 109 |
4 files changed, 346 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 8e02da746e..3209ff6ceb 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -500,6 +500,7 @@ var passes = [...]pass{ {name: "tighten tuple selectors", fn: tightenTupleSelectors, required: true}, {name: "lowered deadcode", fn: deadcode, required: true}, {name: "checkLower", fn: checkLower, required: true}, + {name: "loop invariant", fn: licm}, {name: "late phielim and copyelim", fn: copyelim}, {name: "tighten", fn: tighten, required: true}, // move values closer to their uses {name: "merge conditional branches", fn: mergeConditionalBranches}, // generate conditional comparison instructions on ARM64 architecture diff --git a/src/cmd/compile/internal/ssa/licm.go b/src/cmd/compile/internal/ssa/licm.go new file mode 100644 index 0000000000..1cdb4b5186 --- /dev/null +++ b/src/cmd/compile/internal/ssa/licm.go @@ -0,0 +1,167 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +// We are looking for loops with following structure +// (loop bodies may have control flow inside): +// +// +--------------+ +// | | +// | preheader | +// | | +// +-------+------+ +// | +// | +// +-------v------+ +// | | +// +------> header | +// | | | +// | +-------+------+ +// | | +// | | +// | +-------v------+ +// | | | +// +------+ loop body | +// | | +// +--------------+ +// +// +// We consider all phis and memory operations as initial loop dependent set. +// So loop independent values are all loop values, +// minus transitive closure of initial loop dependent values. +// We remove those values from their BBs and move them to preheader. + +func licm(f *Func) { + // See likelyadjust.go for details about loop info. + nest := loopnestfor(f) + if len(nest.loops) == 0 || nest.hasIrreducible { + return + } + + uses := uses(f) + defer uses.free(f) + + loopDependent := f.Cache.allocBoolSlice(f.NumValues()) + defer f.Cache.freeBoolSlice(loopDependent) + queue := f.Cache.allocValueSlice(f.NumValues()) + defer f.Cache.freeValueSlice(queue) + queue = queue[:0] + + // Start with all values we can't move out of loops. + for _, b := range f.Blocks { + if loop := nest.b2l[b.ID]; loop == nil || !loop.isInner { + // Values outside any loop we don't care about. + // Values not in a leaf loop we can't handle. + continue + } + for _, v := range b.Values { + loopDep := false + if v.Op == OpPhi { + loopDep = true + } else if v.Type.IsMemory() { + // We can't move state-modifying code. + // (TODO: but maybe this is handled by memory Phis anyway?) + loopDep = true + } else if v.Type.IsFlags() || v.Type.IsTuple() && (v.Type.FieldType(0).IsFlags() || v.Type.FieldType(1).IsFlags()) { + // This is not required for correctness. It is just to + // keep the live range of flag values low. + loopDep = true + } else if opcodeTable[v.Op].nilCheck { + // NilCheck in case loop executes 0 times. (It has a memory arg anyway?) + loopDep = true + } else if v.MemoryArg() != nil { + // Because the state of memory might be different at the loop start. (Also handled by Phi?) + loopDep = true + } else if v.Type.IsPtr() { + // Can't move pointer arithmetic, as it may be guarded by conditionals + // and this could materialize a bad pointer across a safepoint. + loopDep = true + } + if loopDep { + loopDependent[v.ID] = true + queue = append(queue, v) + } + } + } + + // If a value can't be moved out of a loop, neither can its users. + // The queue contains values which are loop dependent, but their users + // have not been marked as loop dependent yet. + for len(queue) > 0 { + v := queue[len(queue)-1] + queue = queue[:len(queue)-1] + + for _, u := range uses.get(v) { + if loop := nest.b2l[u.Block.ID]; loop == nil || !loop.isInner { + continue // see above + } + if loopDependent[u.ID] { + continue + } + loopDependent[u.ID] = true + queue = append(queue, u) + } + } + + // Anything not marked as loop-dependent can be moved out of its loop. + for _, b := range f.Blocks { + loop := nest.b2l[b.ID] + if loop == nil || !loop.isInner { + // loopDependent check is wrong for loops containing other loops, + // because then a value might have an argument computed inside + // a nested loop. + continue + } + if len(loop.header.Preds) != 2 { + continue // is never true? + } + anyMoved := false + for i, v := range b.Values { + if loopDependent[v.ID] { + continue + } + // Figure out where to move loop-independent values. + h := loop.header + var inIdx int + if int(h.Preds[0].b.ID) >= len(nest.b2l) || nest.b2l[h.Preds[0].b.ID] != loop { + inIdx = 0 + } else { + inIdx = 1 + } + dest := h.Preds[inIdx].b + if dest.Kind != BlockPlain { + outIdx := h.Preds[inIdx].i + // Introduce a new block between the loop + // header predecessor and the loop header itself. + mid := f.NewBlock(BlockPlain) + mid.Pos = dest.Pos + // Splice into graph. + mid.Preds = append(mid.Preds, Edge{dest, outIdx}) + mid.Succs = append(mid.Succs, Edge{h, inIdx}) + h.Preds[inIdx] = Edge{mid, 0} + dest.Succs[outIdx] = Edge{mid, 0} + + dest = mid + } + + b.Values[i] = nil + v.Block = dest + dest.Values = append(dest.Values, v) + anyMoved = true + } + if anyMoved { + // We just nil'd entries in b.Values above. Compact out the nils. + i := 0 + for _, v := range b.Values { + if v == nil { + continue + } + b.Values[i] = v + i++ + } + b.Values = b.Values[:i] + } + } +} diff --git a/src/cmd/compile/internal/ssa/licm_test.go b/src/cmd/compile/internal/ssa/licm_test.go new file mode 100644 index 0000000000..fc1e22ef9f --- /dev/null +++ b/src/cmd/compile/internal/ssa/licm_test.go @@ -0,0 +1,69 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import ( + "cmd/compile/internal/types" + "testing" +) + +func TestLICM(t *testing.T) { + c := testConfig(t) + fun := c.Fun("entry", + Bloc("entry", + Valu("mem", OpInitMem, types.TypeMem, 0, nil), + Valu("sp", OpSP, c.config.Types.Uintptr, 0, nil), + Valu("a", OpConst64, c.config.Types.Int64, 14, nil), + Goto("loop")), + Bloc("loop", + Valu("b", OpConst64, c.config.Types.Int64, 26, nil), + Valu("sum", OpAdd64, c.config.Types.Int64, 0, nil, "a", "b"), + Valu("load", OpLoad, c.config.Types.BytePtr, 0, nil, "sp", "mem"), + Valu("nilptr", OpConstNil, c.config.Types.BytePtr, 0, nil), + Valu("bool", OpNeqPtr, c.config.Types.Bool, 0, nil, "load", "nilptr"), + If("bool", "loop", "exit")), + Bloc("exit", + Exit("mem"))) + + CheckFunc(fun.f) + licm(fun.f) + CheckFunc(fun.f) + + b := fun.blocks["entry"] + if len(b.Values) != 5 { + // b,sum should have been moved from loop to entry + t.Errorf("loop invariant code wasn't lifted, but should have") + } +} + +func TestLICMNewBlock(t *testing.T) { + c := testConfig(t) + fun := c.Fun("entry", + Bloc("entry", + Valu("mem", OpInitMem, types.TypeMem, 0, nil), + Valu("sp", OpSP, c.config.Types.Uintptr, 0, nil), + Valu("a", OpConst64, c.config.Types.Int64, 14, nil), + Valu("bool2", OpConstBool, c.config.Types.Bool, 0, nil), + If("bool2", "loop", "exit")), + Bloc("loop", + Valu("b", OpConst64, c.config.Types.Int64, 26, nil), + Valu("sum", OpAdd64, c.config.Types.Int64, 0, nil, "a", "b"), + Valu("load", OpLoad, c.config.Types.BytePtr, 0, nil, "sp", "mem"), + Valu("nilptr", OpConstNil, c.config.Types.BytePtr, 0, nil), + Valu("bool", OpNeqPtr, c.config.Types.Bool, 0, nil, "load", "nilptr"), + If("bool", "loop", "exit")), + Bloc("exit", + Exit("mem"))) + + CheckFunc(fun.f) + licm(fun.f) + CheckFunc(fun.f) + + b := fun.blocks["entry"].Succs[0].b + if len(b.Values) != 2 { + // b,sum should have been moved from loop to new block between entry & loop + t.Errorf("loop invariant code wasn't lifted, but should have") + } +} diff --git a/src/cmd/compile/internal/ssa/uses.go b/src/cmd/compile/internal/ssa/uses.go new file mode 100644 index 0000000000..9dd4194d07 --- /dev/null +++ b/src/cmd/compile/internal/ssa/uses.go @@ -0,0 +1,109 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +// useInfo provides a map from a value to the users of that value. +// We do not keep track of this data in the IR directly, as it is +// expensive to keep updated. But sometimes we need to compute it +// temporarily. +// +// A useInfo is only valid until the next modification of the IR. +// +// Note that block control uses are not reported. (TODO: add +// if needed.) +// Also, index of use is not reported. (TODO: add if needed.) +// +// We keep track of all uses in a single array, and use a +// starts array to tell us where to find the sub-array for +// each value. +// +// For a function with 10 values, we would have: +// +// starts[0] starts[1] starts[2] starts[9] len(uses) +// | | | | | +// v v v v v +// +------------+------------+-...-+------------+ +// | uses of v0 | uses of v1 | ... | uses of v9 | +// +------------+------------+-...-+------------+ +// +// We can find all the uses of v by listing all entries +// of uses between starts[v.ID] and starts[v.ID+1]. +type useInfo struct { + starts []int32 + uses []*Value +} + +// build useInfo for a function. Result only valid until +// the next modification of f. +func uses(f *Func) useInfo { + // Write down number of uses of each value. + idx := f.Cache.allocInt32Slice(f.NumValues()) + for _, b := range f.Blocks { + for _, v := range b.Values { + idx[v.ID] = v.Uses + } + } + + // Compute cumulative sum of uses up to and + // including each value ID. + var cum int32 + for vid, uses := range idx { + cum += uses + idx[vid] = cum + } + + // Compute uses. + uses := f.Cache.allocValueSlice(int(cum)) + for _, b := range f.Blocks { + for _, v := range b.Values { + for _, a := range v.Args { + idx[a.ID]-- + uses[idx[a.ID]] = v + } + } + } + for _, b := range f.Blocks { + for _, c := range b.ControlValues() { + // We don't track block control uses, but + // we have to decrement idx values here + // so that the accounting comes out right. + // Each value will have, at the start of its + // use list, a bunch of nils that represent + // the number of Block.Control uses. + idx[c.ID]-- + } + } + + // The loop above decremented each idx entry + // by the number of uses. It now contains + // the sum of uses up to, but not including, + // each value ID. + return useInfo{starts: idx, uses: uses} +} + +// get returns a list of uses of v. +// Every use in an argument slot is listed (e.g. for +// w=(Add v v), w is listed twice in the uses of v). +// Uses by Block.Controls are not reported. +func (u useInfo) get(v *Value) []*Value { + i := u.starts[v.ID] + var j int32 + if int(v.ID) < len(u.starts)-1 { + j = u.starts[v.ID+1] + } else { + j = int32(len(u.uses)) + } + r := u.uses[i:j] + // skip nil entries from block control uses + for len(r) > 0 && r[0] == nil { + r = r[1:] + } + return r +} + +func (u useInfo) free(f *Func) { + f.Cache.freeInt32Slice(u.starts) + f.Cache.freeValueSlice(u.uses) +} |
