diff options
| author | Melnikov Denis <melnikov.denis.aleksandrovich@gmail.com> | 2026-03-27 13:15:45 +0300 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2026-04-08 14:21:41 -0700 |
| commit | 996b985008c7615004c0dbe8b031928faff3c993 (patch) | |
| tree | 0e2d8cd21471b636f7ddc31b07419c93a3c9bc6e | |
| parent | e67d773034fde21e6f726c4add5eeba5882198ae (diff) | |
| download | go-996b985008c7615004c0dbe8b031928faff3c993.tar.xz | |
cmd/compile: improve stp merging for non-sequent cases
Original algorithm merges stores with the first
mergeable store in the chain, but it misses some
cases. Additionally, creating list of STs, which
store data to adjacent memory cells allows merging them
according to the direction of increase of their addresses.
I have already tried another algorithm in CL 698097,
but it was reverted. This algorithm works differently
and fixes bug, generated by variant from another CL.
Fixes #71987, #75365
There are the results of sweet benchmarks
│ base.stat │ opt.stat │
│ sec/op │ sec/op vs base │
ESBuildThreeJS-4 1.088 ± 2% 1.086 ± 1% ~ (p=1.000 n=10)
ESBuildRomeTS-4 263.0m ± 2% 260.8m ± 1% ~ (p=0.105 n=10)
EtcdPut-4 73.08m ± 1% 73.16m ± 1% ~ (p=0.971 n=10)
EtcdSTM-4 414.9m ± 1% 415.4m ± 1% ~ (p=0.393 n=10)
GoBuildKubelet-4 203.3 ± 0% 203.5 ± 0% ~ (p=0.393 n=10)
GoBuildKubeletLink-4 19.06 ± 1% 19.05 ± 0% ~ (p=0.280 n=10)
GoBuildIstioctl-4 156.6 ± 0% 156.6 ± 0% ~ (p=0.796 n=10)
GoBuildIstioctlLink-4 14.16 ± 1% 14.18 ± 1% ~ (p=0.853 n=10)
GoBuildFrontend-4 56.45 ± 1% 56.57 ± 0% ~ (p=0.579 n=10)
GoBuildFrontendLink-4 3.635 ± 1% 3.646 ± 0% ~ (p=0.436 n=10)
GoBuildTsgo-4 103.0 ± 1% 103.4 ± 1% ~ (p=0.529 n=10)
GoBuildTsgoLink-4 1.865 ± 1% 1.860 ± 1% ~ (p=0.684 n=10)
GopherLuaKNucleotide-4 33.55 ± 0% 33.58 ± 0% ~ (p=0.075 n=10)
MarkdownRenderXHTML-4 281.0m ± 0% 280.3m ± 0% -0.23% (p=0.019 n=10)
Tile38QueryLoad-4 970.0µ ± 1% 969.3µ ± 0% ~ (p=0.436 n=10)
geomean 3.128 3.128 -0.01%
Change-Id: Ia548b43601b1bdb1c1723d300a4b8b907ab0c040
Reviewed-on: https://go-review.googlesource.com/c/go/+/760100
Reviewed-by: Mark Freeman <markfreeman@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Keith Randall <khr@golang.org>
| -rw-r--r-- | src/cmd/compile/internal/ssa/pair.go | 213 | ||||
| -rw-r--r-- | test/codegen/memcombine.go | 12 | ||||
| -rw-r--r-- | test/fixedbugs/issue75365.go | 30 | ||||
| -rw-r--r-- | test/fixedbugs/issue75365.out | 1 |
4 files changed, 156 insertions, 100 deletions
diff --git a/src/cmd/compile/internal/ssa/pair.go b/src/cmd/compile/internal/ssa/pair.go index 7595fdb04c..be524dc0a6 100644 --- a/src/cmd/compile/internal/ssa/pair.go +++ b/src/cmd/compile/internal/ssa/pair.go @@ -320,10 +320,70 @@ func memoryBarrierTest(b *Block) bool { return false } +// pairStores merges store instructions. +// It collects stores into a buffer where they can be freely reordered. +// When encountering an instruction that cannot be added to the buffer, +// it pairs the accumulated stores, flushes the buffer, and continues processing. func pairStores(f *Func) { last := f.Cache.allocBoolSlice(f.NumValues()) defer f.Cache.freeBoolSlice(last) + // memChain contains a list of stores with the same ptr/aux pair and + // nonoverlapping write ranges [AuxInt:AuxInt+writeSize]. All of the + // elements of memChain can be reordered with each other. + memChain := []*Value{} + + // Limit of length of memChain array. + // This keeps us in O(n) territory. + limit := 100 + + // flushMemChain sorts the stores in memChain and merges them when possible. + // Then it flushes memChain. + flushMemChain := func() { + if len(memChain) < 2 { + memChain = memChain[:0] + return + } + + // Sort in increasing AuxInt to put pairable stores together. + slices.SortFunc(memChain, func(x, y *Value) int { + return int(x.AuxInt - y.AuxInt) + }) + + lastIdx := len(memChain) - 1 + for i := 0; i < lastIdx; i++ { + v := memChain[i] + w := memChain[i+1] + info := pairableStores[v.Op] + + off := v.AuxInt + mem := v.MemoryArg() + aux := v.Aux + pos := v.Pos + wmem := w.MemoryArg() + + if w.Op == v.Op && w.AuxInt == off+info.width { + // Arguments for the merged store: ptr, val1, val2, mem. + args := []*Value{v.Args[0], v.Args[1], w.Args[1], mem} + + v.reset(info.pair) + v.AddArgs(args...) + v.Aux = aux + v.AuxInt = off + v.Pos = pos + + // Make w just a memory copy. + w.reset(OpCopy) + w.SetArgs1(wmem) + + // Skip merged store (w) + i++ + } + } + + memChain = memChain[:0] + } + // prevStore returns the previous store in the // same block, or nil if there are none. prevStore := func(v *Value) *Value { @@ -337,7 +397,28 @@ func pairStores(f *Func) { return m } + // storeWidth returns the width of store, + // or 0 if it is not a store this pass understands. + storeWidth := func(op Op) int64 { + var width int64 + switch op { + case OpARM64MOVDstore, OpARM64FMOVDstore: + width = 8 + case OpARM64MOVWstore, OpARM64FMOVSstore: + width = 4 + case OpARM64MOVHstore: + width = 2 + case OpARM64MOVBstore: + width = 1 + default: + width = 0 + } + return width + } + for _, b := range f.Blocks { + memChain = memChain[:0] + // Find last store in block, so we can // walk the stores last to first. // Last to first helps ensure that the rewrites we @@ -362,110 +443,46 @@ func pairStores(f *Func) { } } - // Check all stores, from last to first. - memCheck: + // Iterate over memory stores, accumulating them in memChain for potential merging. + // Flush the chain when reordering is unsafe or a conflict is detected. for v := lastMem; v != nil; v = prevStore(v) { - info := pairableStores[v.Op] - if info.width == 0 { - continue // Not pairable. + writeSize := storeWidth(v.Op) + + if writeSize == 0 { + // We can't reorder stores with calls or other instructions + // with writeSize == 0. + flushMemChain() + continue } - if !offsetOk(v.Aux, v.AuxInt, info.width) { - continue // Not advisable to pair. + if v.Uses != 1 && len(memChain) > 0 || + len(memChain) > 0 && (v.Args[0] != memChain[0].Args[0] || v.Aux != memChain[0].Aux) || + len(memChain) == limit { + // If v has multiple uses and it is not the latest store in the chain, + // we cannot merge it with other store instructions. + // If v has a different base pointer or Aux value from the current chain, + // we need to flush memChain and start a new one with v. + // If memChain length limit is exceeded, we also need to flush the chain + // and start a new one with v. + // Only look back so far. + // This keeps us in O(n) territory, and it + // also prevents us from keeping values + // in registers for too long (and thus + // needing to spill them). + flushMemChain() } - ptr := v.Args[0] - val := v.Args[1] - mem := v.Args[2] - off := v.AuxInt - aux := v.Aux - // Look for earlier store we can combine with. - lowerOk := true - higherOk := true - count := 10 // max lookback distance - for w := prevStore(v); w != nil; w = prevStore(w) { - if w.Uses != 1 { - // We can't combine stores if the earlier - // store has any use besides the next one - // in the store chain. - // (Unless we could check the aliasing of - // all those other uses.) - continue memCheck - } - if w.Op == v.Op && - w.Args[0] == ptr && - w.Aux == aux && - (lowerOk && w.AuxInt == off-info.width || higherOk && w.AuxInt == off+info.width) { - // This op is mergeable with v. - - // Commit point. - - // ptr val1 val2 mem - args := []*Value{ptr, val, w.Args[1], mem} - if w.AuxInt == off-info.width { - args[1], args[2] = args[2], args[1] - off -= info.width - } - v.reset(info.pair) - v.AddArgs(args...) - v.Aux = aux - v.AuxInt = off - v.Pos = w.Pos // take position of earlier of the two stores (TODO: not really working?) - - // Make w just a memory copy. - wmem := w.MemoryArg() - w.reset(OpCopy) - w.SetArgs1(wmem) - continue memCheck - } - if count--; count == 0 { - // Only look back so far. - // This keeps us in O(n) territory, and it - // also prevents us from keeping values - // in registers for too long (and thus - // needing to spill them). - continue memCheck - } - // We're now looking at a store w which is currently - // between the store v that we're intending to merge into, - // and the store we'll eventually find to merge with it. - // Make sure this store doesn't alias with the one - // we'll be moving. - var width int64 - switch w.Op { - case OpARM64MOVDstore, OpARM64FMOVDstore: - width = 8 - case OpARM64MOVWstore, OpARM64FMOVSstore: - width = 4 - case OpARM64MOVHstore: - width = 2 - case OpARM64MOVBstore: - width = 1 - case OpCopy: - continue // this was a store we merged earlier - default: - // Can't reorder with any other memory operations. - // (atomics, calls, ...) - continue memCheck - } - - // We only allow reordering with respect to other - // writes to the same pointer and aux, so we can - // compute the exact the aliasing relationship. - if w.Args[0] != ptr || w.Aux != aux { - continue memCheck - } - if overlap(w.AuxInt, width, off-info.width, info.width) { - // Aliases with slot before v's location. - lowerOk = false - } - if overlap(w.AuxInt, width, off+info.width, info.width) { - // Aliases with slot after v's location. - higherOk = false - } - if !higherOk && !lowerOk { - continue memCheck + for _, w := range memChain { + wWriteSize := storeWidth(w.Op) + if overlap(w.AuxInt, wWriteSize, v.AuxInt, writeSize) { + // Aliases with w's location. + // Flush the chain and start a new one with v. + flushMemChain() + break } } + + memChain = append(memChain, v) } + flushMemChain() } } diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index f2f57fffe4..55af978707 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -1062,17 +1062,25 @@ func dwstoreF32(p *struct{ a, b float32 }, x, y float32) { } func dwstoreBig(p *struct{ a, b, c, d, e, f int64 }, a, b, c, d, e, f int64) { - // This is not perfect. We merge b+a, then d+e, then c and f have no pair. + // arm64:`STP\s\(R[0-9]+, R[0-9]+\), 16\(R[0-9]+\)` p.c = c p.f = f // arm64:`STP \(R[0-9]+, R[0-9]+\), \(R[0-9]+\)` p.a = a - // arm64:`STP \(R[0-9]+, R[0-9]+\), 24\(R[0-9]+\)` + // arm64:`STP\s\(R[0-9]+, R[0-9]+\), 32\(R[0-9]+\)` p.e = e p.d = d p.b = b } +func dwstoreBigNil(p *struct{ i, j struct{ a, b, c int } }) { + // arm64:`STP\s\(ZR, ZR\), 32\(R[0-9]+\)` + p.j = struct{ a, b, c int }{} + // arm64:`STP\s\(ZR, ZR\), \(R[0-9]+\)` + // arm64:`STP\s\(ZR, ZR\), 16\(R[0-9]+\)` + p.i = struct{ a, b, c int }{} +} + func dwstoreRet() [2]int { // arm64:"STP " return [2]int{5, 6} diff --git a/test/fixedbugs/issue75365.go b/test/fixedbugs/issue75365.go new file mode 100644 index 0000000000..3748fc0c22 --- /dev/null +++ b/test/fixedbugs/issue75365.go @@ -0,0 +1,30 @@ +// run + +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "unsafe" +) + +type S struct { + p *byte + a string + b string + c int64 + d int64 +} + +func main() { + s := &S{p: nil, a: "foo", b: "foo", c: 0, d: 0} + s.a = "" + s.b = "bar" + s.c = 33 + + z := (*[2]uintptr)(unsafe.Pointer(&s.a)) + fmt.Printf("%x %x\n", z[0], z[1]) +} diff --git a/test/fixedbugs/issue75365.out b/test/fixedbugs/issue75365.out new file mode 100644 index 0000000000..b748e2dcfc --- /dev/null +++ b/test/fixedbugs/issue75365.out @@ -0,0 +1 @@ +0 0 |
