aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cmd/compile/internal/ssa/func_test.go5
-rw-r--r--src/cmd/compile/internal/ssa/regalloc.go73
-rw-r--r--src/cmd/compile/internal/ssa/regalloc_test.go59
3 files changed, 129 insertions, 8 deletions
diff --git a/src/cmd/compile/internal/ssa/func_test.go b/src/cmd/compile/internal/ssa/func_test.go
index 1a378d4a95..479186b34d 100644
--- a/src/cmd/compile/internal/ssa/func_test.go
+++ b/src/cmd/compile/internal/ssa/func_test.go
@@ -260,6 +260,11 @@ func Eq(cond, sub, alt string) ctrl {
return ctrl{BlockAMD64EQ, cond, []string{sub, alt}}
}
+// Lt specifies a BlockAMD64LT.
+func Lt(cond, yes, no string) ctrl {
+ return ctrl{BlockAMD64LT, cond, []string{yes, no}}
+}
+
// bloc, ctrl, and valu are internal structures used by Bloc, Valu, Goto,
// If, and Exit to help define blocks.
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index a0257f3064..dbcc93a7dd 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -2015,7 +2015,27 @@ func (s *regAllocState) regalloc(f *Func) {
goto badloop
}
- // TODO: sort by distance, pick the closest ones?
+ // Look into target block, find Phi arguments that come from b.
+ phiArgs := regValLiveSet // reuse this space
+ phiArgs.clear()
+ for _, v := range b.Succs[0].b.Values {
+ if v.Op == OpPhi {
+ phiArgs.add(v.Args[b.Succs[0].i].ID)
+ }
+ }
+
+ // Get mask of all registers that might be used soon in the destination.
+ // We don't want to kick values out of these registers, but we will
+ // kick out an unlikely-to-be-used value for a likely-to-be-used one.
+ var likelyUsedRegs regMask
+ for _, live := range s.live[b.ID] {
+ if live.dist < unlikelyDistance {
+ likelyUsedRegs |= s.values[live.ID].regs
+ }
+ }
+ // Promote values we're going to use soon in the destination to registers.
+ // Note that this iterates nearest-use first, as we sorted
+ // live lists by distance in computeLive.
for _, live := range s.live[b.ID] {
if live.dist >= unlikelyDistance {
// Don't preload anything live after the loop.
@@ -2023,14 +2043,41 @@ func (s *regAllocState) regalloc(f *Func) {
}
vid := live.ID
vi := &s.values[vid]
- if vi.regs != 0 {
+ v := s.orig[vid]
+ if phiArgs.contains(vid) {
+ // A phi argument needs its value in a regular register,
+ // as returned by compatRegs. Being in a fixed register
+ // (e.g. the zero register) or being easily
+ // rematerializeable isn't enough.
+ if vi.regs&s.compatRegs(v.Type) != 0 {
+ continue
+ }
+ } else {
+ if vi.regs != 0 {
+ continue
+ }
+ if vi.rematerializeable {
+ // TODO: maybe we should not skip rematerializeable
+ // values here. One rematerialization outside the loop
+ // is better than N in the loop. But rematerializations
+ // are cheap, and spilling another value may not be.
+ // And we don't want to materialize the zero register
+ // into a different register when it is just the
+ // argument to a store.
+ continue
+ }
+ }
+ if vi.rematerializeable && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
continue
}
- if vi.rematerializeable {
+ // Registers we could load v into.
+ // Don't kick out other likely-used values.
+ m := s.compatRegs(v.Type) &^ likelyUsedRegs
+ if m == 0 {
+ // To many likely-used values to give them all a register.
continue
}
- v := s.orig[vid]
- m := s.compatRegs(v.Type) &^ s.used
+
// Used desired register if available.
outerloop:
for _, e := range desired.entries {
@@ -2047,9 +2094,8 @@ func (s *regAllocState) regalloc(f *Func) {
if m&^desired.avoid != 0 {
m &^= desired.avoid
}
- if m != 0 {
- s.allocValToReg(v, m, false, b.Pos)
- }
+ s.allocValToReg(v, m, false, b.Pos)
+ likelyUsedRegs |= s.values[v.ID].regs
}
}
badloop:
@@ -3129,6 +3175,17 @@ func (s *regAllocState) computeLive() {
unfinishedBlocks = unfinishedBlocks[:n]
}
+ // Sort live values in order of their nearest next use.
+ // Useful for promoting values to registers, nearest use first.
+ for _, b := range f.Blocks {
+ slices.SortFunc(s.live[b.ID], func(a, b liveInfo) int {
+ if a.dist != b.dist {
+ return cmp.Compare(a.dist, b.dist)
+ }
+ return cmp.Compare(a.ID, b.ID) // for deterministic sorting
+ })
+ }
+
s.computeDesired()
if f.pass.debug > regDebug {
diff --git a/src/cmd/compile/internal/ssa/regalloc_test.go b/src/cmd/compile/internal/ssa/regalloc_test.go
index 12f5820f1f..18d42063fb 100644
--- a/src/cmd/compile/internal/ssa/regalloc_test.go
+++ b/src/cmd/compile/internal/ssa/regalloc_test.go
@@ -346,3 +346,62 @@ func TestRematerializeableRegCompatible(t *testing.T) {
t.Errorf("Expects an Copy to be issued, but got: %+v", f.f)
}
}
+
+func TestPreload(t *testing.T) {
+ c := testConfig(t)
+ // amd64 has 13 general registers. We use 1 for ptr and 12 for x0-11.
+ // They all contain live values at the end of the entry block.
+ f := c.Fun("entry",
+ Bloc("entry",
+ Valu("ptr", OpArgIntReg, c.config.Types.Int8.PtrTo(), 0, &AuxNameOffset{Name: c.Temp(c.config.Types.Int8.PtrTo()), Offset: 0}),
+ Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+ Valu("x0", OpAMD64MOVBload, c.config.Types.Int8, 0, nil, "ptr", "mem"),
+ Valu("x1", OpAMD64MOVBload, c.config.Types.Int8, 1, nil, "ptr", "mem"),
+ Valu("x2", OpAMD64MOVBload, c.config.Types.Int8, 2, nil, "ptr", "mem"),
+ Valu("x3", OpAMD64MOVBload, c.config.Types.Int8, 3, nil, "ptr", "mem"),
+ Valu("x4", OpAMD64MOVBload, c.config.Types.Int8, 4, nil, "ptr", "mem"),
+ Valu("x5", OpAMD64MOVBload, c.config.Types.Int8, 5, nil, "ptr", "mem"),
+ Valu("x6", OpAMD64MOVBload, c.config.Types.Int8, 6, nil, "ptr", "mem"),
+ Valu("x7", OpAMD64MOVBload, c.config.Types.Int8, 7, nil, "ptr", "mem"),
+ Valu("x8", OpAMD64MOVBload, c.config.Types.Int8, 8, nil, "ptr", "mem"),
+ Valu("x9", OpAMD64MOVBload, c.config.Types.Int8, 9, nil, "ptr", "mem"),
+ Valu("x10", OpAMD64MOVBload, c.config.Types.Int8, 10, nil, "ptr", "mem"),
+ Valu("x11", OpAMD64MOVBload, c.config.Types.Int8, 11, nil, "ptr", "mem"),
+ Valu("init", OpAMD64MOVQconst, c.config.Types.Int64, 0, nil),
+ Goto("loopHead"),
+ ),
+ Bloc("loopHead",
+ Valu("phi", OpPhi, c.config.Types.Int64, 0, nil, "init", "next"),
+ Valu("test", OpAMD64CMPQconst, types.TypeFlags, 10, nil, "phi"),
+ Lt("test", "loopBody", "exit"),
+ ),
+ Bloc("loopBody",
+ Valu("next", OpAMD64ADDQconst, c.config.Types.Int64, 1, nil, "phi"),
+ Goto("loopHead"),
+ ),
+ Bloc("exit",
+ Valu("m0", OpAMD64MOVBstore, types.TypeMem, 0, nil, "ptr", "x0", "mem"),
+ Valu("m1", OpAMD64MOVBstore, types.TypeMem, 1, nil, "ptr", "x1", "m0"),
+ Valu("m2", OpAMD64MOVBstore, types.TypeMem, 2, nil, "ptr", "x2", "m1"),
+ Valu("m3", OpAMD64MOVBstore, types.TypeMem, 3, nil, "ptr", "x3", "m2"),
+ Valu("m4", OpAMD64MOVBstore, types.TypeMem, 4, nil, "ptr", "x4", "m3"),
+ Valu("m5", OpAMD64MOVBstore, types.TypeMem, 5, nil, "ptr", "x5", "m4"),
+ Valu("m6", OpAMD64MOVBstore, types.TypeMem, 6, nil, "ptr", "x6", "m5"),
+ Valu("m7", OpAMD64MOVBstore, types.TypeMem, 7, nil, "ptr", "x7", "m6"),
+ Valu("m8", OpAMD64MOVBstore, types.TypeMem, 8, nil, "ptr", "x8", "m7"),
+ Valu("m9", OpAMD64MOVBstore, types.TypeMem, 9, nil, "ptr", "x9", "m8"),
+ Valu("m10", OpAMD64MOVBstore, types.TypeMem, 10, nil, "ptr", "x10", "m9"),
+ Valu("m11", OpAMD64MOVBstore, types.TypeMem, 11, nil, "ptr", "x11", "m10"),
+ Ret("m11"),
+ ),
+ )
+ f.f.Blocks[1].Likely = BranchLikely
+ regalloc(f.f)
+ checkFunc(f.f)
+
+ v := f.values["phi"]
+ loc := f.f.RegAlloc[v.ID]
+ if _, ok := loc.(*Register); !ok {
+ t.Errorf("Expects to use a register for phi, but got: %s\n%v", loc, f.f)
+ }
+}