diff options
| author | Keith Randall <khr@golang.org> | 2026-01-30 14:16:40 -0800 |
|---|---|---|
| committer | Keith Randall <khr@golang.org> | 2026-03-20 13:51:47 -0700 |
| commit | c8df1410d50f69b50eb5e643d15b6a3aab0ada06 (patch) | |
| tree | f6b3ab8d0c87fa8d7875eb3fca15b3b769a77a01 /src/cmd | |
| parent | 51abbb12c478ad44949367c0c478ee8f87c1f6bf (diff) | |
| download | go-c8df1410d50f69b50eb5e643d15b6a3aab0ada06.tar.xz | |
cmd/compile: on arm64 pair a load with a load in a subsequent block
Look into the following block(s) for a load that can be paired with
the load we're trying to pair up.
This particularly helps the generated equality functions. Instead of doing
MOVD x(R0), R2
MOVD x(R1), R3
CMP R2, R3
BNE noteq
MOVD x+8(R0), R2
MOVD x+8(R1), R3
CMP R2, R3
BNE noteq
we do
LDP x(R0), (R2, R4)
LDP x(R1), (R3, R5)
CMP R2, R3
BNE noteq
CMP R4, R5
BNE noteq
Removes 5296 bytes of code from cmd/go.
Change-Id: I6368686892ac944783c8b07ed7252126d1ef4031
Reviewed-on: https://go-review.googlesource.com/c/go/+/740741
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd')
| -rw-r--r-- | src/cmd/compile/internal/ssa/pair.go | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/pair.go b/src/cmd/compile/internal/ssa/pair.go index 83d7e476dd..7595fdb04c 100644 --- a/src/cmd/compile/internal/ssa/pair.go +++ b/src/cmd/compile/internal/ssa/pair.go @@ -7,6 +7,7 @@ package ssa import ( "cmd/compile/internal/ir" "cmd/compile/internal/types" + "cmd/internal/obj" "slices" ) @@ -206,6 +207,117 @@ func pairLoads(f *Func) { i++ // Skip y next time around the loop. } } + + // Try to pair a load with a load from a subsequent block. + // Note that this is always safe to do if the memory arguments match. + // (But see the memory barrier case below.) + type nextBlockKey struct { + op Op + ptr ID + mem ID + auxInt int64 + aux any + } + nextBlock := map[nextBlockKey]*Value{} + for _, b := range f.Blocks { + if memoryBarrierTest(b) { + // TODO: Do we really need to skip write barrier test blocks? + // type T struct { + // a *byte + // b int + // } + // func f(t *T) int { + // r := t.b + // t.a = nil + // return r + // } + // This would issue a single LDP for both the t.a and t.b fields, + // *before* we check the write barrier flag. (We load the t.a field + // to put it in the write barrier buffer.) Not sure if that is ok. + continue + } + // Find loads in the next block(s) that we can move to this one. + // TODO: could maybe look further than just one successor hop. + clear(nextBlock) + for _, e := range b.Succs { + if len(e.b.Preds) > 1 { + continue + } + for _, v := range e.b.Values { + info := pairableLoads[v.Op] + if info.width == 0 { + continue + } + if !offsetOk(v.Aux, v.AuxInt, info.width) { + continue // not advisable + } + nextBlock[nextBlockKey{op: v.Op, ptr: v.Args[0].ID, mem: v.Args[1].ID, auxInt: v.AuxInt, aux: v.Aux}] = v + } + } + if len(nextBlock) == 0 { + continue + } + // don't move too many loads. Each requires a register across a basic block boundary. + const maxMoved = 4 + nMoved := 0 + for i := len(b.Values) - 1; i >= 0 && nMoved < maxMoved; i-- { + x := b.Values[i] + info := pairableLoads[x.Op] + if info.width == 0 { + continue + } + if !offsetOk(x.Aux, x.AuxInt, info.width) { + continue // not advisable + } + key := nextBlockKey{op: x.Op, ptr: x.Args[0].ID, mem: x.Args[1].ID, auxInt: x.AuxInt + info.width, aux: x.Aux} + if y := nextBlock[key]; y != nil { + delete(nextBlock, key) + + // Make the 2-register load. + load := b.NewValue2IA(x.Pos, info.pair, types.NewTuple(x.Type, y.Type), x.AuxInt, x.Aux, x.Args[0], x.Args[1]) + + // Modify x to be (Select0 load). + x.reset(OpSelect0) + x.SetArgs1(load) + // Modify y to be (Copy (Select1 load)). + // Note: the Select* needs to live in the load's block, not y's block. + y.reset(OpCopy) + y.SetArgs1(b.NewValue1(y.Pos, OpSelect1, y.Type, load)) + nMoved++ + continue + } + key.auxInt = x.AuxInt - info.width + if y := nextBlock[key]; y != nil { + delete(nextBlock, key) + + // Make the 2-register load. + load := b.NewValue2IA(x.Pos, info.pair, types.NewTuple(y.Type, x.Type), y.AuxInt, x.Aux, x.Args[0], x.Args[1]) + + // Modify x to be (Select1 load). + x.reset(OpSelect1) + x.SetArgs1(load) + // Modify y to be (Copy (Select0 load)). + y.reset(OpCopy) + y.SetArgs1(b.NewValue1(y.Pos, OpSelect0, y.Type, load)) + nMoved++ + continue + } + } + } +} + +func memoryBarrierTest(b *Block) bool { + if b.Kind != BlockARM64NZW { + return false + } + c := b.Controls[0] + if c.Op != OpARM64MOVWUload { + return false + } + if globl, ok := c.Aux.(*obj.LSym); ok { + return globl.Name == "runtime.writeBarrier" + } + return false } func pairStores(f *Func) { |
