From 8e60feeb41bd5212ebdcec3e7769116cb4a2d4f8 Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 17 Sep 2025 17:21:37 -0400 Subject: [dev.simd] cmd/compile: improve slicemask removal this will be subsumed by pending changes in local slice representation, however this was easy and works well. Change-Id: I5b6eb10d257f04f906be7a8a6f2b6833992a39e8 Reviewed-on: https://go-review.googlesource.com/c/go/+/704876 Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/prove.go | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'src/cmd/compile') diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go index 5ed5be4744..b4f91fd4fd 100644 --- a/src/cmd/compile/internal/ssa/prove.go +++ b/src/cmd/compile/internal/ssa/prove.go @@ -2529,24 +2529,38 @@ func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) { switch v.Op { case OpSlicemask: // Replace OpSlicemask operations in b with constants where possible. - x, delta := isConstDelta(v.Args[0]) - if x == nil { + cap := v.Args[0] + x, delta := isConstDelta(cap) + if x != nil { + // slicemask(x + y) + // if x is larger than -y (y is negative), then slicemask is -1. + lim := ft.limits[x.ID] + if lim.umin > uint64(-delta) { + if cap.Op == OpAdd64 { + v.reset(OpConst64) + } else { + v.reset(OpConst32) + } + if b.Func.pass.debug > 0 { + b.Func.Warnl(v.Pos, "Proved slicemask not needed") + } + v.AuxInt = -1 + } break } - // slicemask(x + y) - // if x is larger than -y (y is negative), then slicemask is -1. - lim := ft.limits[x.ID] - if lim.umin > uint64(-delta) { - if v.Args[0].Op == OpAdd64 { + lim := ft.limits[cap.ID] + if lim.umin > 0 { + if cap.Type.Size() == 8 { v.reset(OpConst64) } else { v.reset(OpConst32) } if b.Func.pass.debug > 0 { - b.Func.Warnl(v.Pos, "Proved slicemask not needed") + b.Func.Warnl(v.Pos, "Proved slicemask not needed (by limit)") } v.AuxInt = -1 } + case OpCtz8, OpCtz16, OpCtz32, OpCtz64: // On some architectures, notably amd64, we can generate much better // code for CtzNN if we know that the argument is non-zero. -- cgit v1.3-5-g9baa