diff options
Diffstat (limited to 'src/cmd/compile')
| -rw-r--r-- | src/cmd/compile/internal/ssa/func_test.go | 5 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/regalloc.go | 73 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/regalloc_test.go | 59 |
3 files changed, 129 insertions, 8 deletions
diff --git a/src/cmd/compile/internal/ssa/func_test.go b/src/cmd/compile/internal/ssa/func_test.go index 1a378d4a95..479186b34d 100644 --- a/src/cmd/compile/internal/ssa/func_test.go +++ b/src/cmd/compile/internal/ssa/func_test.go @@ -260,6 +260,11 @@ func Eq(cond, sub, alt string) ctrl { return ctrl{BlockAMD64EQ, cond, []string{sub, alt}} } +// Lt specifies a BlockAMD64LT. +func Lt(cond, yes, no string) ctrl { + return ctrl{BlockAMD64LT, cond, []string{yes, no}} +} + // bloc, ctrl, and valu are internal structures used by Bloc, Valu, Goto, // If, and Exit to help define blocks. diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index a0257f3064..dbcc93a7dd 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -2015,7 +2015,27 @@ func (s *regAllocState) regalloc(f *Func) { goto badloop } - // TODO: sort by distance, pick the closest ones? + // Look into target block, find Phi arguments that come from b. + phiArgs := regValLiveSet // reuse this space + phiArgs.clear() + for _, v := range b.Succs[0].b.Values { + if v.Op == OpPhi { + phiArgs.add(v.Args[b.Succs[0].i].ID) + } + } + + // Get mask of all registers that might be used soon in the destination. + // We don't want to kick values out of these registers, but we will + // kick out an unlikely-to-be-used value for a likely-to-be-used one. + var likelyUsedRegs regMask + for _, live := range s.live[b.ID] { + if live.dist < unlikelyDistance { + likelyUsedRegs |= s.values[live.ID].regs + } + } + // Promote values we're going to use soon in the destination to registers. + // Note that this iterates nearest-use first, as we sorted + // live lists by distance in computeLive. for _, live := range s.live[b.ID] { if live.dist >= unlikelyDistance { // Don't preload anything live after the loop. @@ -2023,14 +2043,41 @@ func (s *regAllocState) regalloc(f *Func) { } vid := live.ID vi := &s.values[vid] - if vi.regs != 0 { + v := s.orig[vid] + if phiArgs.contains(vid) { + // A phi argument needs its value in a regular register, + // as returned by compatRegs. Being in a fixed register + // (e.g. the zero register) or being easily + // rematerializeable isn't enough. + if vi.regs&s.compatRegs(v.Type) != 0 { + continue + } + } else { + if vi.regs != 0 { + continue + } + if vi.rematerializeable { + // TODO: maybe we should not skip rematerializeable + // values here. One rematerialization outside the loop + // is better than N in the loop. But rematerializations + // are cheap, and spilling another value may not be. + // And we don't want to materialize the zero register + // into a different register when it is just the + // argument to a store. + continue + } + } + if vi.rematerializeable && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm { continue } - if vi.rematerializeable { + // Registers we could load v into. + // Don't kick out other likely-used values. + m := s.compatRegs(v.Type) &^ likelyUsedRegs + if m == 0 { + // To many likely-used values to give them all a register. continue } - v := s.orig[vid] - m := s.compatRegs(v.Type) &^ s.used + // Used desired register if available. outerloop: for _, e := range desired.entries { @@ -2047,9 +2094,8 @@ func (s *regAllocState) regalloc(f *Func) { if m&^desired.avoid != 0 { m &^= desired.avoid } - if m != 0 { - s.allocValToReg(v, m, false, b.Pos) - } + s.allocValToReg(v, m, false, b.Pos) + likelyUsedRegs |= s.values[v.ID].regs } } badloop: @@ -3129,6 +3175,17 @@ func (s *regAllocState) computeLive() { unfinishedBlocks = unfinishedBlocks[:n] } + // Sort live values in order of their nearest next use. + // Useful for promoting values to registers, nearest use first. + for _, b := range f.Blocks { + slices.SortFunc(s.live[b.ID], func(a, b liveInfo) int { + if a.dist != b.dist { + return cmp.Compare(a.dist, b.dist) + } + return cmp.Compare(a.ID, b.ID) // for deterministic sorting + }) + } + s.computeDesired() if f.pass.debug > regDebug { diff --git a/src/cmd/compile/internal/ssa/regalloc_test.go b/src/cmd/compile/internal/ssa/regalloc_test.go index 12f5820f1f..18d42063fb 100644 --- a/src/cmd/compile/internal/ssa/regalloc_test.go +++ b/src/cmd/compile/internal/ssa/regalloc_test.go @@ -346,3 +346,62 @@ func TestRematerializeableRegCompatible(t *testing.T) { t.Errorf("Expects an Copy to be issued, but got: %+v", f.f) } } + +func TestPreload(t *testing.T) { + c := testConfig(t) + // amd64 has 13 general registers. We use 1 for ptr and 12 for x0-11. + // They all contain live values at the end of the entry block. + f := c.Fun("entry", + Bloc("entry", + Valu("ptr", OpArgIntReg, c.config.Types.Int8.PtrTo(), 0, &AuxNameOffset{Name: c.Temp(c.config.Types.Int8.PtrTo()), Offset: 0}), + Valu("mem", OpInitMem, types.TypeMem, 0, nil), + Valu("x0", OpAMD64MOVBload, c.config.Types.Int8, 0, nil, "ptr", "mem"), + Valu("x1", OpAMD64MOVBload, c.config.Types.Int8, 1, nil, "ptr", "mem"), + Valu("x2", OpAMD64MOVBload, c.config.Types.Int8, 2, nil, "ptr", "mem"), + Valu("x3", OpAMD64MOVBload, c.config.Types.Int8, 3, nil, "ptr", "mem"), + Valu("x4", OpAMD64MOVBload, c.config.Types.Int8, 4, nil, "ptr", "mem"), + Valu("x5", OpAMD64MOVBload, c.config.Types.Int8, 5, nil, "ptr", "mem"), + Valu("x6", OpAMD64MOVBload, c.config.Types.Int8, 6, nil, "ptr", "mem"), + Valu("x7", OpAMD64MOVBload, c.config.Types.Int8, 7, nil, "ptr", "mem"), + Valu("x8", OpAMD64MOVBload, c.config.Types.Int8, 8, nil, "ptr", "mem"), + Valu("x9", OpAMD64MOVBload, c.config.Types.Int8, 9, nil, "ptr", "mem"), + Valu("x10", OpAMD64MOVBload, c.config.Types.Int8, 10, nil, "ptr", "mem"), + Valu("x11", OpAMD64MOVBload, c.config.Types.Int8, 11, nil, "ptr", "mem"), + Valu("init", OpAMD64MOVQconst, c.config.Types.Int64, 0, nil), + Goto("loopHead"), + ), + Bloc("loopHead", + Valu("phi", OpPhi, c.config.Types.Int64, 0, nil, "init", "next"), + Valu("test", OpAMD64CMPQconst, types.TypeFlags, 10, nil, "phi"), + Lt("test", "loopBody", "exit"), + ), + Bloc("loopBody", + Valu("next", OpAMD64ADDQconst, c.config.Types.Int64, 1, nil, "phi"), + Goto("loopHead"), + ), + Bloc("exit", + Valu("m0", OpAMD64MOVBstore, types.TypeMem, 0, nil, "ptr", "x0", "mem"), + Valu("m1", OpAMD64MOVBstore, types.TypeMem, 1, nil, "ptr", "x1", "m0"), + Valu("m2", OpAMD64MOVBstore, types.TypeMem, 2, nil, "ptr", "x2", "m1"), + Valu("m3", OpAMD64MOVBstore, types.TypeMem, 3, nil, "ptr", "x3", "m2"), + Valu("m4", OpAMD64MOVBstore, types.TypeMem, 4, nil, "ptr", "x4", "m3"), + Valu("m5", OpAMD64MOVBstore, types.TypeMem, 5, nil, "ptr", "x5", "m4"), + Valu("m6", OpAMD64MOVBstore, types.TypeMem, 6, nil, "ptr", "x6", "m5"), + Valu("m7", OpAMD64MOVBstore, types.TypeMem, 7, nil, "ptr", "x7", "m6"), + Valu("m8", OpAMD64MOVBstore, types.TypeMem, 8, nil, "ptr", "x8", "m7"), + Valu("m9", OpAMD64MOVBstore, types.TypeMem, 9, nil, "ptr", "x9", "m8"), + Valu("m10", OpAMD64MOVBstore, types.TypeMem, 10, nil, "ptr", "x10", "m9"), + Valu("m11", OpAMD64MOVBstore, types.TypeMem, 11, nil, "ptr", "x11", "m10"), + Ret("m11"), + ), + ) + f.f.Blocks[1].Likely = BranchLikely + regalloc(f.f) + checkFunc(f.f) + + v := f.values["phi"] + loc := f.f.RegAlloc[v.ID] + if _, ok := loc.(*Register); !ok { + t.Errorf("Expects to use a register for phi, but got: %s\n%v", loc, f.f) + } +} |
