diff options
| author | Jayanth Krishnamurthy jayanth.krishnamurthy@ibm.com <jayanth.krishnamurthy@ibm.com> | 2025-09-15 17:33:42 -0500 |
|---|---|---|
| committer | Archana Ravindar <aravinda@redhat.com> | 2026-04-06 01:13:27 -0700 |
| commit | d74de3ce79d4ab3495650bfcc4682cab09514b89 (patch) | |
| tree | 46f97583b1b6c0664b7335807cf202b7a4128a2f /src/cmd/compile/internal/ssa | |
| parent | 081aa64e610b175e295159c2117f25ecf49953ed (diff) | |
| download | go-d74de3ce79d4ab3495650bfcc4682cab09514b89.tar.xz | |
cmd/compile: improve uint8/uint16 logical immediates on PPC64
Logical ops on uint8/uint16 (AND/OR/XOR) with constants sometimes
materialized the mask via MOVD (often as a negative immediate), even
when the value fit in the UI-immediate range. This prevented the backend
from selecting andi. / ori / xori forms.
This CL makes:
UI-immediate truncation is performed only at the use-site of
logical-immediate ops, and only when the constant does not fit in the
8- or 16-bit unsigned domain (m != uint8(m) / m != uint16(m)).
This avoids negative-mask materialization and enables correct emission of
UI-form logical instructions. Arithmetic SI-immediate instructions (addi, subfic, etc.) and other
use-patterns are unchanged.
Codegen tests are added to ensure the expected andi./ori/xori
patterns appear and that MOVD is not emitted for valid 8/16-bit masks.
Change-Id: I9fcdf4498c4e984c7587814fb9019a75865c4a0d
Cq-Include-Trybots: luci.golang.try:gotip-linux-ppc64_power10,gotip-linux-ppc64_power8,gotip-linux-ppc64le_power8,gotip-linux-ppc64le_power9,gotip-linux-ppc64le_power10
Reviewed-on: https://go-review.googlesource.com/c/go/+/704015
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Paul Murphy <paumurph@redhat.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Mark Freeman <markfreeman@google.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa')
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/PPC64.rules | 10 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/_gen/PPC64latelower.rules | 3 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewritePPC64.go | 126 | ||||
| -rw-r--r-- | src/cmd/compile/internal/ssa/rewritePPC64latelower.go | 22 |
4 files changed, 136 insertions, 25 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules index 6d40687264..0872648bf7 100644 --- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules @@ -142,6 +142,16 @@ (ROTLW x (MOVDconst [c])) => (ROTLWconst x [c&31]) (ROTL x (MOVDconst [c])) => (ROTLconst x [c&63]) +// uint8: logical ops with constant -> UI immediates (only if truncation needed). +(AND <t> x (MOVDconst [m])) && t.IsUnsigned() && t.Size() == 1 && m != int64(uint8(m)) => (ANDconst [int64(uint8(m))] x) +(OR <t> x (MOVDconst [m])) && t.IsUnsigned() && t.Size() == 1 && m != int64(uint8(m)) => (ORconst [int64(uint8(m))] x) +(XOR <t> x (MOVDconst [m])) && t.IsUnsigned() && t.Size() == 1 && m != int64(uint8(m)) => (XORconst [int64(uint8(m))] x) + +// uint16: logical ops with constant -> UI immediates (only if truncation needed). +(AND <t> x (MOVDconst [m])) && t.IsUnsigned() && t.Size() == 2 && m != int64(uint16(m)) => (ANDconst [int64(uint16(m))] x) +(OR <t> x (MOVDconst [m])) && t.IsUnsigned() && t.Size() == 2 && m != int64(uint16(m)) => (ORconst [int64(uint16(m))] x) +(XOR <t> x (MOVDconst [m])) && t.IsUnsigned() && t.Size() == 2 && m != int64(uint16(m)) => (XORconst [int64(uint16(m))] x) + // Combine rotate and mask operations (ANDconst [m] (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x) (AND (MOVDconst [m]) (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x) diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64latelower.rules b/src/cmd/compile/internal/ssa/_gen/PPC64latelower.rules index 15e6f72519..cd60e79a9d 100644 --- a/src/cmd/compile/internal/ssa/_gen/PPC64latelower.rules +++ b/src/cmd/compile/internal/ssa/_gen/PPC64latelower.rules @@ -18,9 +18,6 @@ (SETBC [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [1] (MOVDconst [1]) cmp) (SETBCR [1] cmp) && buildcfg.GOPPC64 <= 9 => (ISELZ [5] (MOVDconst [1]) cmp) -// The upper bits of the smaller than register values is undefined. Take advantage of that. -(AND <t> x:(MOVDconst [m]) n) && t.Size() <= 2 => (ANDconst [int64(int16(m))] n) - // Convert simple bit masks to an equivalent rldic[lr] if possible. (AND x:(MOVDconst [m]) n) && isPPC64ValidShiftMask(m) => (RLDICL [encodePPC64RotateMask(0,m,64)] n) (AND x:(MOVDconst [m]) n) && m != 0 && isPPC64ValidShiftMask(^m) => (RLDICR [encodePPC64RotateMask(0,m,64)] n) diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 6a7df42546..181494e669 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -4398,6 +4398,48 @@ func rewriteValuePPC64_OpPPC64ADDconst(v *Value) bool { func rewriteValuePPC64_OpPPC64AND(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + // match: (AND <t> x (MOVDconst [m])) + // cond: t.IsUnsigned() && t.Size() == 1 && m != int64(uint8(m)) + // result: (ANDconst [int64(uint8(m))] x) + for { + t := v.Type + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_1.AuxInt) + if !(t.IsUnsigned() && t.Size() == 1 && m != int64(uint8(m))) { + continue + } + v.reset(OpPPC64ANDconst) + v.AuxInt = int64ToAuxInt(int64(uint8(m))) + v.AddArg(x) + return true + } + break + } + // match: (AND <t> x (MOVDconst [m])) + // cond: t.IsUnsigned() && t.Size() == 2 && m != int64(uint16(m)) + // result: (ANDconst [int64(uint16(m))] x) + for { + t := v.Type + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_1.AuxInt) + if !(t.IsUnsigned() && t.Size() == 2 && m != int64(uint16(m))) { + continue + } + v.reset(OpPPC64ANDconst) + v.AuxInt = int64ToAuxInt(int64(uint16(m))) + v.AddArg(x) + return true + } + break + } // match: (AND (MOVDconst [m]) (ROTLWconst [r] x)) // cond: isPPC64WordRotateMask(m) // result: (RLWINM [encodePPC64RotateMask(r,m,32)] x) @@ -11739,6 +11781,48 @@ func rewriteValuePPC64_OpPPC64NotEqual(v *Value) bool { func rewriteValuePPC64_OpPPC64OR(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + // match: (OR <t> x (MOVDconst [m])) + // cond: t.IsUnsigned() && t.Size() == 1 && m != int64(uint8(m)) + // result: (ORconst [int64(uint8(m))] x) + for { + t := v.Type + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_1.AuxInt) + if !(t.IsUnsigned() && t.Size() == 1 && m != int64(uint8(m))) { + continue + } + v.reset(OpPPC64ORconst) + v.AuxInt = int64ToAuxInt(int64(uint8(m))) + v.AddArg(x) + return true + } + break + } + // match: (OR <t> x (MOVDconst [m])) + // cond: t.IsUnsigned() && t.Size() == 2 && m != int64(uint16(m)) + // result: (ORconst [int64(uint16(m))] x) + for { + t := v.Type + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_1.AuxInt) + if !(t.IsUnsigned() && t.Size() == 2 && m != int64(uint16(m))) { + continue + } + v.reset(OpPPC64ORconst) + v.AuxInt = int64ToAuxInt(int64(uint16(m))) + v.AddArg(x) + return true + } + break + } // match: (OR x (NOR y y)) // result: (ORN x y) for { @@ -13082,6 +13166,48 @@ func rewriteValuePPC64_OpPPC64SUBFCconst(v *Value) bool { func rewriteValuePPC64_OpPPC64XOR(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + // match: (XOR <t> x (MOVDconst [m])) + // cond: t.IsUnsigned() && t.Size() == 1 && m != int64(uint8(m)) + // result: (XORconst [int64(uint8(m))] x) + for { + t := v.Type + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_1.AuxInt) + if !(t.IsUnsigned() && t.Size() == 1 && m != int64(uint8(m))) { + continue + } + v.reset(OpPPC64XORconst) + v.AuxInt = int64ToAuxInt(int64(uint8(m))) + v.AddArg(x) + return true + } + break + } + // match: (XOR <t> x (MOVDconst [m])) + // cond: t.IsUnsigned() && t.Size() == 2 && m != int64(uint16(m)) + // result: (XORconst [int64(uint16(m))] x) + for { + t := v.Type + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_1.AuxInt) + if !(t.IsUnsigned() && t.Size() == 2 && m != int64(uint16(m))) { + continue + } + v.reset(OpPPC64XORconst) + v.AuxInt = int64ToAuxInt(int64(uint16(m))) + v.AddArg(x) + return true + } + break + } // match: (XOR (MOVDconst [c]) (MOVDconst [d])) // result: (MOVDconst [c^d]) for { diff --git a/src/cmd/compile/internal/ssa/rewritePPC64latelower.go b/src/cmd/compile/internal/ssa/rewritePPC64latelower.go index 18c05280c0..011f66c769 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64latelower.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64latelower.go @@ -55,28 +55,6 @@ func rewriteValuePPC64latelower_OpPPC64ADD(v *Value) bool { func rewriteValuePPC64latelower_OpPPC64AND(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AND <t> x:(MOVDconst [m]) n) - // cond: t.Size() <= 2 - // result: (ANDconst [int64(int16(m))] n) - for { - t := v.Type - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if x.Op != OpPPC64MOVDconst { - continue - } - m := auxIntToInt64(x.AuxInt) - n := v_1 - if !(t.Size() <= 2) { - continue - } - v.reset(OpPPC64ANDconst) - v.AuxInt = int64ToAuxInt(int64(int16(m))) - v.AddArg(n) - return true - } - break - } // match: (AND x:(MOVDconst [m]) n) // cond: isPPC64ValidShiftMask(m) // result: (RLDICL [encodePPC64RotateMask(0,m,64)] n) |
