aboutsummaryrefslogtreecommitdiff
path: root/src/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd')
-rw-r--r--src/cmd/compile/internal/ssa/pair.go112
1 files changed, 112 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/pair.go b/src/cmd/compile/internal/ssa/pair.go
index 83d7e476dd..7595fdb04c 100644
--- a/src/cmd/compile/internal/ssa/pair.go
+++ b/src/cmd/compile/internal/ssa/pair.go
@@ -7,6 +7,7 @@ package ssa
import (
"cmd/compile/internal/ir"
"cmd/compile/internal/types"
+ "cmd/internal/obj"
"slices"
)
@@ -206,6 +207,117 @@ func pairLoads(f *Func) {
i++ // Skip y next time around the loop.
}
}
+
+ // Try to pair a load with a load from a subsequent block.
+ // Note that this is always safe to do if the memory arguments match.
+ // (But see the memory barrier case below.)
+ type nextBlockKey struct {
+ op Op
+ ptr ID
+ mem ID
+ auxInt int64
+ aux any
+ }
+ nextBlock := map[nextBlockKey]*Value{}
+ for _, b := range f.Blocks {
+ if memoryBarrierTest(b) {
+ // TODO: Do we really need to skip write barrier test blocks?
+ // type T struct {
+ // a *byte
+ // b int
+ // }
+ // func f(t *T) int {
+ // r := t.b
+ // t.a = nil
+ // return r
+ // }
+ // This would issue a single LDP for both the t.a and t.b fields,
+ // *before* we check the write barrier flag. (We load the t.a field
+ // to put it in the write barrier buffer.) Not sure if that is ok.
+ continue
+ }
+ // Find loads in the next block(s) that we can move to this one.
+ // TODO: could maybe look further than just one successor hop.
+ clear(nextBlock)
+ for _, e := range b.Succs {
+ if len(e.b.Preds) > 1 {
+ continue
+ }
+ for _, v := range e.b.Values {
+ info := pairableLoads[v.Op]
+ if info.width == 0 {
+ continue
+ }
+ if !offsetOk(v.Aux, v.AuxInt, info.width) {
+ continue // not advisable
+ }
+ nextBlock[nextBlockKey{op: v.Op, ptr: v.Args[0].ID, mem: v.Args[1].ID, auxInt: v.AuxInt, aux: v.Aux}] = v
+ }
+ }
+ if len(nextBlock) == 0 {
+ continue
+ }
+ // don't move too many loads. Each requires a register across a basic block boundary.
+ const maxMoved = 4
+ nMoved := 0
+ for i := len(b.Values) - 1; i >= 0 && nMoved < maxMoved; i-- {
+ x := b.Values[i]
+ info := pairableLoads[x.Op]
+ if info.width == 0 {
+ continue
+ }
+ if !offsetOk(x.Aux, x.AuxInt, info.width) {
+ continue // not advisable
+ }
+ key := nextBlockKey{op: x.Op, ptr: x.Args[0].ID, mem: x.Args[1].ID, auxInt: x.AuxInt + info.width, aux: x.Aux}
+ if y := nextBlock[key]; y != nil {
+ delete(nextBlock, key)
+
+ // Make the 2-register load.
+ load := b.NewValue2IA(x.Pos, info.pair, types.NewTuple(x.Type, y.Type), x.AuxInt, x.Aux, x.Args[0], x.Args[1])
+
+ // Modify x to be (Select0 load).
+ x.reset(OpSelect0)
+ x.SetArgs1(load)
+ // Modify y to be (Copy (Select1 load)).
+ // Note: the Select* needs to live in the load's block, not y's block.
+ y.reset(OpCopy)
+ y.SetArgs1(b.NewValue1(y.Pos, OpSelect1, y.Type, load))
+ nMoved++
+ continue
+ }
+ key.auxInt = x.AuxInt - info.width
+ if y := nextBlock[key]; y != nil {
+ delete(nextBlock, key)
+
+ // Make the 2-register load.
+ load := b.NewValue2IA(x.Pos, info.pair, types.NewTuple(y.Type, x.Type), y.AuxInt, x.Aux, x.Args[0], x.Args[1])
+
+ // Modify x to be (Select1 load).
+ x.reset(OpSelect1)
+ x.SetArgs1(load)
+ // Modify y to be (Copy (Select0 load)).
+ y.reset(OpCopy)
+ y.SetArgs1(b.NewValue1(y.Pos, OpSelect0, y.Type, load))
+ nMoved++
+ continue
+ }
+ }
+ }
+}
+
+func memoryBarrierTest(b *Block) bool {
+ if b.Kind != BlockARM64NZW {
+ return false
+ }
+ c := b.Controls[0]
+ if c.Op != OpARM64MOVWUload {
+ return false
+ }
+ if globl, ok := c.Aux.(*obj.LSym); ok {
+ return globl.Name == "runtime.writeBarrier"
+ }
+ return false
}
func pairStores(f *Func) {